In [None]:
import tensorflow as tf 
from keras.applications.vgg16 import VGG16
from keras.applications.resnet import ResNet50
import os 
import numpy as np 
from sklearn.preprocessing import OneHotEncoder 
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import pandas as pd 
import matplotlib.pyplot as plt

### Here prepare the folder if does not exist

In [None]:
# you need the current working directory NB: works both windows and linux 
current_working_directory = os.getcwd()
current_working_directory = os.path.dirname(current_working_directory)

if not os.path.exists(f"{current_working_directory}/Datasets"):
    os.makedirs(f"{current_working_directory}/Datasets")

print(f"[DATASET] PUT THE DATASET here: {current_working_directory}/Datasets")


In [None]:
# get the directory where I want to download the dataset
path_of_dataset = os.path.join(*['..', current_working_directory, 'Datasets', 'Most_Stolen_Cars'])
print(f"[DIR] The directory of the current dataset is {path_of_dataset}")

### Data prep

In [None]:
# here let s do some functions that we can re-use also for other assignment
def load_the_data_and_the_labels(data_set_path: str, target_size: tuple or None = None):
    """
    This function help you to load the data dynamically 
    :param data_set_path: (str) put the path created in the previous cell (is the dataset path) 
    :param target_size: (tuple) the desired size of the images  
    :return: 
        - array of images 
        - array with labels 
        - list of labels name (this is used for better visualization)
    """
    try:
        dataset, labels, name_of_the_labels = list(), list(), list() 
        # let s loop here and we try to discover how many class we have 
        for class_number, class_name in enumerate(os.listdir(data_set_path)):
            full_path_the_data = os.path.join(*[data_set_path, class_name])
            print(f"[WALK] I am walking into {full_path_the_data}")
            
            # add the list to nam _list
            name_of_the_labels.append(class_name)
            
            for single_image in os.listdir(f"{full_path_the_data}"):
                full_path_to_image = os.path.join(*[full_path_the_data, single_image])
                
                # add the class number 
                labels.append(class_number)
                
                if target_size is None:
                    # let s load the image 
                    image = tf.keras.utils.load_img(full_path_to_image)
                else:
                    image = tf.keras.utils.load_img(full_path_to_image, target_size=target_size)
                
                # transform PIL object in image                    
                image = tf.keras.utils.img_to_array(image)
                
                # add the image to the ds list 
                dataset.append(image)
                
        return np.array(dataset, dtype='uint8'), np.array(labels, dtype='int'), name_of_the_labels
    except Exception as ex:
        print(f"[EXCEPTION] load the data and the labels throws exceptions {ex}")

#### Load the data
    a. Target size: (112, 112, 3)
    b. if for some reason your pc crash saying Out of Memory reduce half the target size

In [None]:
# here

### normalize the data here

In [None]:
# do it here

### Convert the data to one hot encoding (use the sklearn function)

In [None]:
# here we have to one hot encode the labes
def make_the_one_hot_encoding(labels_to_transform):
    try:
        enc = OneHotEncoder(handle_unknown='ignore')
        # this is a trick to figure the array as 2d array instead of list 
        temp = np.reshape(labels_to_transform, (-1, 1))
        labels_to_transform = enc.fit_transform(temp).toarray()
        print(f'[ONE HOT ENCODING] Labels are one-hot-encoded: {(labels_to_transform.sum(axis=1) - np.ones(labels_to_transform.shape[0])).sum() == 0}')
        return labels_to_transform
    except Exception as ex:
        print(f"[EXCEPTION] Make the one hot encoding throws exception {ex}")

In [None]:
# do it here 

# split the data in train set and test set 
    a. use 0.3 as split factor 

### Create a CNN with the following characteristics
        a. Input layer
        b. As base model use VGG16:
            i. Weights: imagenet
            ii. Include_top: False
            iii. Input_shape the target shape described in point 1. 
        c. Add a flatten layer 
        d. Add a Dense layer with 512 units and a dropout layer with 0.2 unit.
        e. Add a Dense layer with 256 units and a dropout layer with 0.2 unit.
        f. Add the final classifier with the correct number of units and the suitable activation.

![alt text](assignment_2_tl.png "CNN with tl")

In [None]:
# do it here 

### Set the layer block5_conv2, block5_conv3, block5_pool trainable 
    Important: you can make a function when you create a CNN within the option of make layers trainable or not is up to you!

In [None]:
#do it here 

### Train the model 
    a. set the batch size 32 (if your PC go Out of memory lower this number half)
    b. set epochs to 15

In [None]:
# do it here 

### evaluate the model and record the accuracy score.

In [None]:
# do it here 

### Load again the CNN and set all the base model layers to not trainable.

In [None]:
# here

### Repeat the train and evaluation steps

In [None]:
# here 

### What happen? Why?

### Make and visualize some predictions. 

In [None]:
# here 