### Add libraries

In [1]:
import tensorflow as tf 
from keras.applications.vgg19 import VGG19
from keras.applications.resnet import ResNet50
import os 
import numpy as np 
from sklearn.preprocessing import OneHotEncoder 
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import pandas as pd 
import matplotlib.pyplot as plt

### Path system

In [2]:
# you need the current working directory NB: works both windows and linux 
current_working_directory = os.getcwd()
current_working_directory = os.path.dirname(current_working_directory)

# get the directory where I want to download the dataset
path_of_download = os.path.join(*['..', current_working_directory, 'exercises2', 'Datasets', 'weather_dataset'])
print(f"[DIR] The directory of the current dataset is {path_of_download}")

# Check GPU availability for faster computation
print(f'The GPU is {"available" if tf.config.list_physical_devices("GPU") else "not available"}')

[DIR] The directory of the current dataset is c:\Users\itane\Documents\GitHub\ComputerVisionAndSensorFunsion\exercises2\Datasets\weather_dataset
The GPU is available


### function for data loading 

In [3]:
# here let s do some functions that we can re-use also for other assignment
def load_the_data_and_the_labels(data_set_path: str, target_size: tuple or None = None):
    try:
        dataset, labels, name_of_the_labels = list(), list(), list() 
        # let s loop here and we try to discover how many class we have 
        for class_number, class_name in enumerate(os.listdir(data_set_path)):
            full_path_the_data = os.path.join(data_set_path, class_name)
            print(f"[WALK] I am walking into {full_path_the_data}")
            
            # add the list to nam _list
            name_of_the_labels.append(class_name)
            
            for single_image in os.listdir(f"{full_path_the_data}"):
                full_path_to_image = os.path.join(*[full_path_the_data, single_image])
                
                # add the class number 
                labels.append(class_number)
                
                if target_size is None:
                    # let s load the image 
                    image = tf.keras.utils.load_img(full_path_to_image)
                else:
                    image = tf.keras.utils.load_img(full_path_to_image, target_size=target_size)
                
                # transform PIL object in image                    
                image = tf.keras.utils.img_to_array(image)
                
                # add the image to the ds list 
                dataset.append(image)
                
        return np.array(dataset, dtype='uint8'), np.array(labels, dtype='int'), name_of_the_labels
    except Exception as ex:
        print(f"[EXCEPTION] load the data and the labels throws exceptions {ex}")

### OHE function

In [4]:
# here we have to one hot encode the labes
def make_the_one_hot_encoding(labels_to_transform):
    try:
        enc = OneHotEncoder(handle_unknown='ignore')
        # this is a trick to figure the array as 2d array instead of list 
        temp = np.reshape(labels_to_transform, (-1, 1))
        labels_to_transform = enc.fit_transform(temp).toarray()
        print(f'[ONE HOT ENCODING] Labels are one-hot-encoded: {(labels_to_transform.sum(axis=1) - np.ones(labels_to_transform.shape[0])).sum() == 0}')
        return labels_to_transform
    except Exception as ex:
        print(f"[EXCEPTION] Make the one hot encoding throws exception {ex}")

### load the data and labels 

In [5]:
# Load the data and the labels with premade function
weather_dataset, weather_labels, name_of_the_labels = load_the_data_and_the_labels(os.path.join(path_of_download), target_size=(224, 224))

# Print the shape of the dataset
print(f"The shape of the dataset is {weather_dataset.shape}")

[WALK] I am walking into c:\Users\itane\Documents\GitHub\ComputerVisionAndSensorFunsion\exercises2\Datasets\weather_dataset\Cloudy
[WALK] I am walking into c:\Users\itane\Documents\GitHub\ComputerVisionAndSensorFunsion\exercises2\Datasets\weather_dataset\Rain
[WALK] I am walking into c:\Users\itane\Documents\GitHub\ComputerVisionAndSensorFunsion\exercises2\Datasets\weather_dataset\Shine
[WALK] I am walking into c:\Users\itane\Documents\GitHub\ComputerVisionAndSensorFunsion\exercises2\Datasets\weather_dataset\Sunrise
The shape of the dataset is (1125, 224, 224, 3)


Now we have loaded the data and we can see that the dataset is in the correct format (1125, 224, 224, 3), giving us a total of 1125 images. All these images have a size of 224, 224 and each image has three different colour channels.

### normalize the data

In [6]:
# Now we want to do the data normalization
# We want to divide the dataset by 255 to have values between 0 and 1
weather_dataset = weather_dataset / 255.0

### One Hot Encoding

Let's do the one hot encoding of the labels. One hot encoding is a process, where we convert each category into a binary vector. For example, here we have 4 different categories, so we will have 4 columns, where each column corresponds to one category.

In [7]:
# One hot encoding of the labels with the premade function
weather_labels = make_the_one_hot_encoding(weather_labels)

[ONE HOT ENCODING] Labels are one-hot-encoded: True


### split the dataset in train and test set (ratio 0.3)

Let's split the dataset in training and test sets. As the introduction says, we want to have test split size of 0.3

In [8]:
# Train test splitting
X_train, X_test, Y_train, Y_test = train_test_split(weather_dataset, weather_labels, test_size=0.3)

### create the CNN and set all parameters to trainable
a.	Input layer
b.	As base model use VGG19:
    i.	Weights: imagenet
    ii.	Include_top: False
    iii.	Input_shape the target shape described in point 1. 
c.	Add a flatten layer 
d.	Add a Dense layer with 512 units and a dropout layer with 0.1 unit.
e.	Add a Dense layer with 256 units and a dropout layer with 0.1 unit.
f.	Add the final classifier with the correct number of units and the suitable activation.


In [9]:
# Let's go for the model creation
# At first, we want to create an input layer with the same shape of the images
input_layer = tf.keras.layers.Input(shape=(224, 224, 3))

# Create the base model with the VGG19
# Here we use weights of imagenet and we are not using the top layer
# We also want to specify the input shape to be the same as the images. [1]
base_model = VGG19(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

# Now we do a flatten layer to have a one dimensional array
flatten_layer = tf.keras.layers.Flatten()(base_model(input_layer))

# Now we want to add a dense layer with total of 512 neurons and dropout of 0.1 units.
dense_layer1 = tf.keras.layers.Dense(512, activation='relu')(flatten_layer)
dropout_layer1 = tf.keras.layers.Dropout(0.1)(dense_layer1)

# Now add another dense layer but with 256 neurons and dropout of 0.1 units.
dense_layer2 = tf.keras.layers.Dense(256, activation='relu')(dropout_layer1)
dropout_layer2 = tf.keras.layers.Dropout(0.1)(dense_layer2)

# Final classification layer with 4 neurons and softmax activation function
# We want to have 4 neurons because we want to classify the images in 4 different classes.
output_layer = tf.keras.layers.Dense(4, activation='softmax')(dropout_layer2)

### compile the model with adam

In [10]:
# Define the model with the input and output layers
model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

# Compile the model. I choose adam as optimizer because it is a good for transfer learning,
# but also because it works well with smaller datasets like we have here.
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Create a summary of the model to see the layers and the number of parameters are correctly set
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 vgg19 (Functional)          (None, 7, 7, 512)         20024384  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 dense (Dense)               (None, 512)               12845568  
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dropout_1 (Dropout)         (None, 256)               0     

If we look at the summary, we see that all the layers we used in the model are listed, which is correct. All parameters are trainable, which means that every weight in the base model is updated during training. Let's move on to training part then!

### Fit the model with batch size 32 and 15 epochs (This take 15 - 20 minutes with the CPU)

In [11]:
# Fit the model
history = model.fit(X_train, Y_train, epochs=15, batch_size=32)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


### Evaluate  the model 

In [12]:
# Evaluate the model now
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print(f'Final loss of the model is: {test_loss}')
print(f'Final accuracy of the model is: {test_accuracy}')

Final loss of the model is: 1.3789113759994507
Final accuracy of the model is: 0.2958579957485199


If we look at the values in the fitting part, we can see that the model already works quite well, with an accuracy of around 88%. We can see that the accuracy steadily improved from about 29% to 88%, which is to be expected. We also see that the loss decreased as the accuracy increased, indicating that the model maintain its learning well.

There is still possibilities for improvements in the performance of this model. For example, we can add more data to get even better results, but instead of training all layers at once, we can freeze the base model (VGG19) and then train only the classifier and fine-tune the model. This is a task we are going to do soon, but for now we will do some predictions and visualization to get clearer insights on this current model.

#### Make and show predictions 

In [13]:
# Lets create some predictions with our model.
predictions = model.predict(X_test)

print(predictions)

[[0.26655582 0.18737422 0.21381803 0.33225188]
 [0.26655582 0.18737422 0.21381803 0.33225188]
 [0.26655582 0.18737422 0.21381803 0.33225188]
 ...
 [0.26655582 0.18737422 0.21381803 0.33225188]
 [0.26655582 0.18737422 0.21381803 0.33225188]
 [0.26655582 0.18737422 0.21381803 0.33225188]]


Here are the results of the probabilities for each category. We can see that in general, there is one category in each set that has a value closer to 1 than the others, which means that this is the category that the model predicted. The other values in these arrays are more closer to zero, which means that these values are unlikely to be the category we are looking for.

### make confusion matrix 

In [14]:
# Finally, we create a confusion matrix to see how the model is performing

# Get the labels from the one hot encoding which we can use for the confusion matrix.
Y_test_labels = np.argmax(Y_test, axis=1)
# Then we get the labels from the predictions.
predictions_labels = np.argmax(predictions, axis=1)

# Create the confusion matrix
confusion_matrix_result = confusion_matrix(Y_test_labels, predictions_labels)

# Create a dataframe to show the confusion matrix
df = pd.DataFrame(confusion_matrix_result, columns=name_of_the_labels, index=name_of_the_labels)
print(df)

# Now lets calculate the accuracy of the model
# This can be done by summing the diagonal of the confusion matrix and dividing it by the total number of samples. [2]
accuracy = np.trace(confusion_matrix_result) / np.sum(confusion_matrix_result)
print(f'The accuracy of the model is: {accuracy}')

         Cloudy  Rain  Shine  Sunrise
Cloudy        0     0      0       90
Rain          0     0      0       65
Shine         0     0      0       83
Sunrise       0     0      0      100
The accuracy of the model is: 0.2958579881656805


As we can see, we already have a pretty high level of accuracy with this model. However, we can further increase the accuracy by using the layer freezing of the base model. This should give us a much faster training time, but also better accuracy. Well, let's move on!

### Load again the cnn but this time set the parameters to NOT TRAINABLE 

In [1]:
# Load again the cnn, but this time set the parameters not trainable

# First we want to create the base model with the VGG19 again,
# but this time we want to loop through all the base model layers and set them not trainable.
# This is because we want to use the pre-trained model as a feature extractor.
base_model = VGG19(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
for layer in base_model.layers[:]:
    layer.trainable = False

# Now we want to create the layers of the model again
# First we start with the flatten layer.
flatten_layer = tf.keras.layers.Flatten()(base_model.output)

# Dense layer with 512 neuron and dropout of 0.1 units again
dense_layer1 = tf.keras.layers.Dense(512, activation='relu')(flatten_layer)
dropout_layer1 = tf.keras.layers.Dropout(0.1)(dense_layer1)

# Another dense layer with 256 neurons and dropout of 0.1 units.
dense_layer2 = tf.keras.layers.Dense(256, activation='relu')(dropout_layer1)
dropout_layer2 = tf.keras.layers.Dropout(0.1)(dense_layer2)

# Output layer with 4 neurons and softmax activation function
output_layer = tf.keras.layers.Dense(4, activation='softmax')(dropout_layer2)

# Now compile the model again
model = tf.keras.Model(inputs=base_model.input, outputs=output_layer)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Create a summary of the model to see the layers and the number of parameters are correctly set
model.summary()

NameError: name 'VGG19' is not defined

Now we have a summary of our new model, where we see that the total number of parameters is the same as before, but instead of all parameters being trainable parameters, we now have only 12 977 924 parameters that are used during the training process. This is because we defined a base model that is not trained. This means that we do not update these 20 024 384 parameters during the training process at all.

This layer freezing gives us several benefits. The first benefit is that we can reuse useful features already learned from large datasets and apply them to similar tasks. Freezing also speeds up the training time, because we can avoid learning these generic features from scratch and go directly to the training point that contains the features most relevant to this specific task.

### Fit the model with batch size 32 and 15 epochs (This is fsaster)

In [16]:
# Fit the model
history = model.fit(X_train, Y_train, epochs=15, batch_size=32)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# Evaluate the model

In [17]:
# Evaluate the model now
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print(f'Final loss of the model is: {test_loss}')
print(f'Final accuracy of the model is: {test_accuracy}')

Final loss of the model is: 0.36729922890663147
Final accuracy of the model is: 0.9023668766021729


Well, as we can see the accuracy got better than previously. This means that the freezing base model layers worked, while achiving accuracy of about 90.5%, which is great. The loss of the model also decreased, which shows that model is even better at maintaining the learning. Now let's do some predictions and confusion matrix with that new model!

### Make and show some predictions 

In [18]:
# Lets create some predictions with our model.
predictions = model.predict(X_test)

print(predictions)

[[9.9980956e-01 2.1907445e-05 4.7691915e-06 1.6379439e-04]
 [1.8841012e-04 1.9889779e-07 7.1063405e-04 9.9910069e-01]
 [4.2579376e-09 9.9999607e-01 3.8379048e-06 4.3229225e-08]
 ...
 [3.2512207e-06 9.2364438e-08 5.2721687e-05 9.9994397e-01]
 [9.9741852e-01 7.0394795e-05 2.6444125e-04 2.2465901e-03]
 [1.2436957e-06 1.7646542e-05 5.6333997e-06 9.9997556e-01]]


This shows that all these values with the highest probability are even more closer to 1. The values for the less likely categories are even smaller and closer to zero.

### Confusion Matrix

In [19]:
# Finally, we create a confusion matrix to see how the model is performing
# We are now using the same labels as before, but we are using the predictions from the model we just trained.
predictions_labels = np.argmax(predictions, axis=1)
confusion_matrix_result = confusion_matrix(Y_test_labels, predictions_labels)

# Create a dataframe to show the confusion matrix
df = pd.DataFrame(confusion_matrix_result, columns=name_of_the_labels, index=name_of_the_labels)
print(df)

# Calculate the accuracy of the model with same formula as before
accuracy = np.trace(confusion_matrix_result) / np.sum(confusion_matrix_result)
print(f'Accuracy of the model is: {accuracy}')

         Cloudy  Rain  Shine  Sunrise
Cloudy       89     0      0        1
Rain          4    57      0        4
Shine        13     0     62        8
Sunrise       1     1      1       97
Accuracy of the model is: 0.9023668639053254


The difference in the multiclass confusion matrix between the previous model and this model is not very big, but if we look at the matrix in more depth, we see that there are more True Positive values than before, indicating that the performance of the model has improved. The accuracy is also much better, as it should be.

### References:

[1] Mastering VGG19: The Deep Learning Architecture That Changed Image Classification | Detailed Guide (https://www.youtube.com/watch?v=udaRL6NdItY)

[2] Evaluating Classifiers: Confusion Matrix for Multiple Classes (https://www.youtube.com/watch?v=FAr2GmWNbT0)