# Lab CNN

Create a CNN that can classify flower types

## importing relevant packages for pre processing

In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:

import numpy as np
import PIL
import PIL.Image
import tensorflow as tf
import pathlib

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import kerastuner as kt
import h5py


## Pre processing the data set

In [None]:
data_dir = pathlib.Path("Data/flowers") # Setting directory path as a variable for the flowers data set


In [None]:
image_count =len(list(data_dir.glob("*/*.jpg"))) # Doing an image count to check the amount of images within the dataset
image_count

In [None]:
list(data_dir.glob("*/*.jpg"))[0:2] # Listing some of the images, This is instance [0:2] is the first folder and first 2 images

In [None]:
# checking to see if the images get displayed
sunflower = list(data_dir.glob("sunflower/*"))
PIL.Image.open(str(sunflower[0])) # checking first image in sunflower folder

### Creating and splitting the data set using keras utilities  

In [None]:
# I will have a few parameters that can change within the function these will include batch size, image width and height
# I will split the data in to a train at 80% and test 20%

def train_test_split(batch_size, img_width, img_height):

    train_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        validation_split = 0.2,
        subset = "training",
        seed = 123,
        image_size = ( img_height,img_width),
        batch_size = batch_size,)

    
    test_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        subset = "validation",
        validation_split=0.2,
        seed = 123,
        image_size = ( img_height, img_width),
        batch_size = batch_size,)

    return train_ds, test_ds


In [None]:
train_ds, test_ds = train_test_split(64,64,64)

In [None]:
# I need to also add class names so that the data has labels

class_names = train_ds.class_names
print(class_names)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,14))
for images, labels in train_ds.take(1):
    for i in range (12):
        ax = plt.subplot(4,3, i +1)
        plt.imshow(images[i].numpy().astype("uint8"))
        
        

        plt.title(class_names[labels[i]])
        

I also did  visual check through the picture files to see if there were any files that stood out and were not so suitable to train on. I will show some examples below.

This would not always be possible on huge datasets as it takes some time to go through every picture. However as it was small I thought it was appropriate to show some examples.

Some images seemed to have no flowers in them at all so these were obvious. Others may of been too  many varieties of flowers in one picture.

I decided that this would be the best approach as when training a model you want to have clean relevant data for it to be trained on.

The picture size has been set to 64 X 64 with a batch size of 64. These reason I made the pictures this small was so that the data would be trained on quicker. Due to the fact that my computer is not the most powerful! You can see however that the pictures are fairly recognizable to the human eye even though they are pixelated. The catch size was set at 64 as this is the extent of the memory for my computer.

In [None]:
bad_data_dir = data_dir = pathlib.Path("Data/bad_data")

bad_data = tf.keras.utils.image_dataset_from_directory(
            bad_data_dir,
            validation_split = 0.2,
            subset = "training",
            seed = 123,
            image_size = ( 64,64),
            batch_size = 64,)

plt.figure(figsize=(12,14))
for images, labels in bad_data.take(1):
    for i in range (12):
        ax = plt.subplot(4,3, i +1)
        plt.imshow(images[i].numpy().astype("uint8"))
        
        

        plt.title(class_names[labels[i]])



In [None]:
def keras_ds_to_numpy_array(train_ds, test_ds):
    
    train_images = []
    train_labels = []
    for batch in train_ds.as_numpy_iterator():
        train_images.append(batch[0])
        train_labels.append(batch[1])

    test_images = []
    test_labels = []
    for batch in test_ds.as_numpy_iterator():
        test_images.append(batch[0])
        test_labels.append(batch[1])

    train_images = np.concatenate(train_images)
    train_labels = np.concatenate(train_labels)
    test_images = np.concatenate(test_images)
    test_labels = np.concatenate(test_labels)

    print(f"Train Images shape:", {train_images.shape})
    print(f"Train Labels Shape:" ,{train_labels.shape})
    print(f"Number of Train Samples:", {len(train_images)})
    print(f"Number of Train labels:" ,{len(train_labels)})

    print(f"Test Images shape:", {test_images.shape})
    print(f"Test Labels Shape:" ,{test_labels.shape})
    print(f"Number of Test Samples:", {len(test_images)})
    print(f"Number of Test labels:", {len(test_labels)})

    return train_images, train_labels, test_images, test_labels

In [None]:
# testing the function with batchsize 32, height 180 and 180
train_images, train_labels, test_images, test_labels =  keras_ds_to_numpy_array(train_ds, test_ds)

## Building the model

I am going to use keras tuner to help find the best hyper parameters for my model. First i have to build a function that will have the different parameters that i will try to build the best model 

In [None]:
img_height = 64
img_width = 64

def model_builder(hp):
    
    model = tf.keras.Sequential()
    model.add(layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)))

    # input layer 
    model.add(layers.Conv2D(
        filters=hp.Int("input_filters", min_value=32, max_value=256, step=32),
        kernel_size=3,
        activation="relu",
        padding="same",
        input_shape=(img_height, img_width, 3)
    ))
    model.add(layers.MaxPooling2D(pool_size=(2,2)))

    # convolution network layers : I will use a for loop that will determine what the best amount of layers will be for the model using the keras tuner
    for i in range(hp.Int("n_conv_layers", 1,4)):
        model.add(layers.Conv2D(
            filters=hp.Int(f"conv_{i}_filters", min_value=32, max_value=256, step=32),
            kernel_size=3,
            activation="relu",
            padding="same")
        )
        model.add(layers.MaxPooling2D(pool_size=(2,2)))
    
    model.add(layers.Flatten())
    for i in range(hp.Int("n_dense_layers", 1,4)):
        model.add(layers.Dense(
            units=hp.Choice(f"n_nodes_{i}", values= [10, 20, 50, 100]), #changed these to small values
            activation="relu")
        )
        
    model.add(layers.Dense(5)) # maybe make it more scalable
    model.add(layers.Activation("softmax"))

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])),
                  loss=keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    
    return model

I will now explain why I chose these parameters to search and the reason behind my model architecture: 

- "Sequential" - I chose a sequential model as CNN picture analysis is a sequenced process and based on a one input tensor one output tensor system. 

- "Rescaling" - I rescaled the images within the model. This ensures that the values of the pictures are between 0 and 1. This makes it easier for the deep learning model to make predictions as everything is between 0 and 1.

- "Convolution Layers" - The first convolution layer is the input layer. This is determind by the size of the input. Using the tuner parameter helps me determine how many filters should be applied. The following layers are then put through the tuner to see how many layers and how many filter work best for validation accuracy. The layer results could be between 1 and 4 

- "kernel  size" is then set to a 3 X 3. This means that every filter passes over the image with a 3 X 3 grid. I chose 3 X 3 as it is very common to use and as the pictures are at 64 X 64 if I had any higher it may miss vital information within the picture. I kept the kernel size the same throughout the model. 

- "Padding" is added so that there is 0's surrounding the input. This helps so that the filter doesn't miss any information on the outer pixels of the image. It is set to same so that output size is the same size as the input size.

- "Activation" 



## Now I have created my model structure it is time to do run the keras tuner to find the best hyper parameters for my model

In [None]:
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='tuner_results',
                     project_name='flowers')

In [None]:
early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss", patience=3)

In [None]:
tuner.search(train_images, train_labels, epochs=50,validation_split=0.2, callbacks=[early_stopping])  #validation_split=0.2,


In [None]:
best_model = tuner.get_best_models(num_models=1)
print(best_model[0].summary())

best_hyperparameters = tuner.get_best_hyperparameters(1)[0]
print(best_hyperparameters.values)

### now i will train my model with best parameters

In [None]:
def train_and_evaluate_model(best_hyperparameters, epochs):

    checkpoint_filepath = "../best_model/checkpoint.model.keras"
    
    model = tuner.hypermodel.build(best_hyperparameters)

    early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss",mode="min", verbose=1, patience=3)
    #h5py.ModelCheckpoint("best_model.h5", monitor="val_loss", mode="min", save_best_only= True, verbose=1)
    model_checkpoint = keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath, 
        monitor="val_accuracy", 
        mode="max", verbose=1, 
        save_best_only=True)
    


    history = model.fit(train_images,train_labels , epochs=epochs, validation_split=0.2, callbacks=[early_stopping, model_checkpoint])

    test_loss, test_accuracy =model.evaluate(test_images, test_labels, verbose=0)
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

    plt.figure(figsize=(8,8))

    plt.subplot(1,2,1)
    plt.plot(history.history["accuracy"], label="Training Accuracy")
    plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(history.history["loss"], label="Training Loss")
    plt.plot(history.history["val_loss"], label="Validation Loss ")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()

    print(model.summary())
    
    return keras.models.load_model(checkpoint_filepath)
    

    



In [None]:
train_and_evaluate_model(best_hyperparameters, 25)

It is noticeable that there is some over fitting within the model. The training accuracy is consistent in the way it continues to improve while the Validation drops out around 70 % accuracy. This can occur if the dta set is too small. I would like to try and improve this. I will try first to use Data augmentation. This adds additional training data by creating random differences od the original images. This is done by example zooming in and rotating. This will hopefully expose the model to more data and generalizations.

I will now add this into my model function and see if it has any improvements. Then I will take the best model again and train that to see if the results are better.

Now I will test the new model 

In [None]:
def train_and_evaluate_model(epochs, img_width, img_height):
    
    model= keras.Sequential([
        layers.Rescaling(1./255, input_shape=(img_width, img_height, 3)),

        layers.Conv2D(128, (3,3), activation ="relu", padding="same"),
        layers.MaxPooling2D((2,2)),

        layers.Conv2D(64, (3,3), activation ="relu", padding="same"),
        layers.MaxPooling2D((2,2)),

        layers.Conv2D(160, (3,3), activation ="relu", padding= "same"),
        layers.MaxPooling2D((2,2)),

        layers.Conv2D(224, (3,3), activation ="relu", padding= "same"),
        layers.MaxPooling2D((2,2)),

        layers.Conv2D(224, (3,3), activation ="relu", padding= "same"),
        layers.MaxPooling2D((2,2)),

        layers.Flatten(),

        layers.Dense(50, activation="relu"),

        layers.Dropout(0.5),

        layers.Dense(5),

        layers.Activation("softmax")


    ])


    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate= 0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'])
    
    early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=3)
    
    checkpoint_filepath = "../best_model/checkpoint.model.keras"

    model_checkpoint = keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath, 
        monitor="val_accuracy", 
        mode="max", verbose=1, 
        save_best_only=True
    )

    history = model.fit(train_images, train_labels, epochs=epochs, validation_split=0.2, callbacks=[early_stopping, model_checkpoint])

    test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

    plt.figure(figsize=(8, 8))

    plt.subplot(1, 2, 1)
    plt.plot(history.history["accuracy"], label="Training Accuracy")
    plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history["loss"], label="Training Loss")
    plt.plot(history.history["val_loss"], label="Validation Loss ")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()

    print(model.summary())
    
    return keras.models.load_model(checkpoint_filepath)


In [None]:
train_and_evaluate_model(25, 64,64)