In [1]:
# Suppress warnings from libraries to keep the output clean
import warnings
warnings.filterwarnings('ignore')

# Import standard Python libraries
import os  # For operating system dependent functionality
import shutil  # For file operations like copying and removing files
import itertools  # For advanced iteration operations
import random  # For generating random numbers

# Import scientific computing libraries
import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical operations

# Import libraries for plotting and visualization
import matplotlib.pyplot as plt  # For creating static, animated, and interactive visualizations
import matplotlib.image as mpimg  # For reading image files

# Import TensorFlow, a deep learning library
import tensorflow as tf

# Import Keras, a high-level API for building and training deep learning models
import tensorflow.keras
from tensorflow.keras import models
from tensorflow.keras import backend as K  # For accessing the Keras backend
from tensorflow.keras.applications import imagenet_utils  # Utilities for ImageNet data
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # For real-time data augmentation
from tensorflow.keras.preprocessing import image  # For image preprocessing utilities
from tensorflow.keras.metrics import categorical_crossentropy  # Loss function for multi-class classification
from tensorflow.keras.optimizers import Adam  # Optimizer for training models
from tensorflow.keras.models import Sequential, Model, load_model  # For creating and loading models

# Import various Keras layers for building neural networks
from tensorflow.keras.layers import Activation, BatchNormalization, Conv2D
from tensorflow.keras.layers import Activation, Dense, Flatten, Input, MaxPooling2D, Dropout

# Import scikit-learn and SciPy libraries
from sklearn.metrics import confusion_matrix  # For computing confusion matrix
from scipy import misc, ndimage  # For image processing

# Import for plotting model architecture
from tensorflow.keras.utils import plot_model  # For creating a visual representation of the model

# Enable inline plotting in Jupyter notebooks
%matplotlib inline

In [2]:
# Restore np.random.seed to its original state
np.random.seed = np.random.seed

# Ensure np.random.seed is not reassigned
assert isinstance(np.random.seed, type(np.random.RandomState().seed)), "np.random.seed has been reassigned."

# Set up a reproducible environment
# Setting a specific hash seed for Python's hash-based operations
os.environ['PYTHONHASHSEED'] = '0'
# Setting the seed for NumPy's random number generator
np.random.seed(123)
# Setting the seed for Python's random module
random.seed(456)
# Setting the seed for TensorFlow's random number generator
tf.random.set_seed(789)

# Configure TensorFlow session to use a single thread for operations
tf.config.threading.set_intra_op_parallelism_threads(1)
tf.config.threading.set_inter_op_parallelism_threads(1)

In [3]:
# Set the path to the main dataset and the path to the train, valid and test folders within the dataset_splits folder
base_path = 'C:/Users/isaac/datasets/eurosat-dataset'
train_path = os.path.join(base_path, 'dataset_splits/train')
valid_path = os.path.join(base_path, 'dataset_splits/valid')
test_path = os.path.join(base_path, 'dataset_splits/test')

In [4]:
# Initialize an empty list to store category names
categories = []

# Walk through the directory tree starting from the specified path
for dirpath, dirnames, filenames in os.walk(base_path):
    # Filter out 'dataset_splits' directory from the list of dirnames
    # This will modify dirnames in-place and prevent os.walk from visiting this directory
    dirnames[:] = [d for d in dirnames if d != 'dataset_splits']
    
    # Add the remaining directory names (categories) to the categories list
    categories.extend(dirnames)

# Print the list of categories (subdirectory names)
print(categories)

['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']


In [5]:
# Iterate through each category within the categories list 
for category in categories:
    # Construct the path to the training images directory for the current category
    tr = os.path.join(train_path, category)
    # Get the number of training images in the current category
    len_tr = len(os.listdir(tr))

    # Construct the path to the validation images directory for the current category
    val = os.path.join(valid_path, category)
    # Get the number of validation images in the current category
    len_val = len(os.listdir(val))

    # Construct the path to the test images directory for the current category
    test = os.path.join(test_path, category)
    # Get the number of test images in the current category
    len_test = len(os.listdir(test))

    # Printing the category names and the training, validation and test images for each category
    print(category + ":")
    print('Train images for ' + category + ': {}'.format(len_tr))
    print('Validation images for ' + category + ': {}'.format(len_val))
    print('Test images for ' + category + ': {}'.format(len_test))
    print('')

AnnualCrop:
Train images for AnnualCrop: 1200
Validation images for AnnualCrop: 150
Test images for AnnualCrop: 150

Forest:
Train images for Forest: 1200
Validation images for Forest: 150
Test images for Forest: 150

HerbaceousVegetation:
Train images for HerbaceousVegetation: 1200
Validation images for HerbaceousVegetation: 150
Test images for HerbaceousVegetation: 150

Highway:
Train images for Highway: 1000
Validation images for Highway: 125
Test images for Highway: 125

Industrial:
Train images for Industrial: 1000
Validation images for Industrial: 125
Test images for Industrial: 125

Pasture:
Train images for Pasture: 800
Validation images for Pasture: 100
Test images for Pasture: 100

PermanentCrop:
Train images for PermanentCrop: 1000
Validation images for PermanentCrop: 125
Test images for PermanentCrop: 125

Residential:
Train images for Residential: 1200
Validation images for Residential: 150
Test images for Residential: 150

River:
Train images for River: 1000
Validation im

In [6]:
# Create a data generator for the training dataset
# This generator will load and preprocess images from the 'train_path' directory
train_batches = ImageDataGenerator().flow_from_directory(
    train_path,
    target_size=(64, 64),
    classes=categories,
    batch_size=20
)
# Create a data generator for the validation dataset
# This generator will load and preprocess images from the 'valid_path' directory
valid_batches = ImageDataGenerator().flow_from_directory(
    valid_path,
    target_size=(64, 64),
    classes=categories,
    batch_size=10
)
# Create a data generator for the test dataset
# This generator will load and preprocess images from the 'test_path' directory
test_batches = ImageDataGenerator().flow_from_directory(
    test_path,
    target_size=(64, 64),
    classes=categories,
    batch_size=10,
    shuffle=False)

Found 10800 images belonging to 10 classes.
Found 1350 images belonging to 10 classes.
Found 1350 images belonging to 10 classes.


In [12]:
# Import VGG16 model from Keras applications module
vgg16_model = tensorflow.keras.applications.vgg16.VGG16(include_top=False, input_shape=(64,64,3))

# Define the input tensor
input_tensor = Input(shape=(64, 64, 3))

# Display summary of the VGG16 model architecture
vgg16_model.summary()

In [16]:
NUM_TRAINABLE_LAYERS = 10
# Freeze the weights of all layers except the last NUM_TRAINABLE_LAYERS layers
for layer in vgg16_model.layers[:-NUM_TRAINABLE_LAYERS]:
    layer.trainable = False  # Exclude layer from future training to retain pre-trained weights

# Optionally, you can check the trainable status of the layers
for i, layer in enumerate(vgg16_model.layers):
    print(f"Layer {i}: {layer.name}, Trainable: {layer.trainable}")

Layer 0: input_layer_3, Trainable: False
Layer 1: block1_conv1, Trainable: False
Layer 2: block1_conv2, Trainable: False
Layer 3: block1_pool, Trainable: False
Layer 4: block2_conv1, Trainable: False
Layer 5: block2_conv2, Trainable: False
Layer 6: block2_pool, Trainable: False
Layer 7: block3_conv1, Trainable: False
Layer 8: block3_conv2, Trainable: False
Layer 9: block3_conv3, Trainable: True
Layer 10: block3_pool, Trainable: True
Layer 11: block4_conv1, Trainable: True
Layer 12: block4_conv2, Trainable: True
Layer 13: block4_conv3, Trainable: True
Layer 14: block4_pool, Trainable: True
Layer 15: block5_conv1, Trainable: True
Layer 16: block5_conv2, Trainable: True
Layer 17: block5_conv3, Trainable: True
Layer 18: block5_pool, Trainable: True


In [17]:
# Pass the input tensor through the pre-trained VGG16 model
vgg16_output = vgg16_model(input_tensor)

# Flatten the output of the VGG16 model
x = Flatten()(vgg16_output)

# Add additional layers for fine-tuning and classification
x = Dense(512, activation='relu')(x)
x = Dense(512, activation='relu')(x)
output_tensor = Dense(len(categories), activation='softmax')(x)

# Create the model by specifying the inputs and outputs
model = Model(inputs=input_tensor, outputs=output_tensor)

In [18]:
# Display summary of the updated model architecture
model.summary()

# Define the optimizer with the desired learning rate
optimizer = Adam(learning_rate=0.0001)

# Compile the model using the defined optimizer
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


In [19]:
# Train the model
history = model.fit(
    train_batches,  # Data generator for the training data
    steps_per_epoch=1080,  # Number of batches to draw from train_batches for each epoch
    validation_data=valid_batches,  # Data generator for the validation data
    validation_steps=270,  # Number of batches to draw from valid_batches for validation after each epoch
    epochs=10,  # Number of times the model will go through the entire training dataset
    verbose=2  # Level of logging verbosity (2 provides one line of output per epoch with loss and accuracy)
)

Epoch 1/10
1080/1080 - 526s - 487ms/step - accuracy: 0.7847 - loss: 0.7161 - val_accuracy: 0.8933 - val_loss: 0.3390
Epoch 2/10
1080/1080 - 553s - 512ms/step - accuracy: 0.9194 - loss: 0.2656 - val_accuracy: 0.9237 - val_loss: 0.2690
Epoch 3/10
1080/1080 - 527s - 488ms/step - accuracy: 0.9366 - loss: 0.2134 - val_accuracy: 0.9119 - val_loss: 0.2872
Epoch 4/10
1080/1080 - 617s - 572ms/step - accuracy: 0.9519 - loss: 0.1557 - val_accuracy: 0.9281 - val_loss: 0.2520
Epoch 5/10
1080/1080 - 578s - 535ms/step - accuracy: 0.9529 - loss: 0.1629 - val_accuracy: 0.9348 - val_loss: 0.2227
Epoch 6/10
1080/1080 - 552s - 511ms/step - accuracy: 0.9669 - loss: 0.1069 - val_accuracy: 0.9259 - val_loss: 0.2841
Epoch 7/10
1080/1080 - 528s - 489ms/step - accuracy: 0.9721 - loss: 0.0971 - val_accuracy: 0.9348 - val_loss: 0.2199
Epoch 8/10
1080/1080 - 531s - 492ms/step - accuracy: 0.9746 - loss: 0.0851 - val_accuracy: 0.9074 - val_loss: 0.3551
Epoch 9/10
1080/1080 - 544s - 504ms/step - accuracy: 0.9769 - lo