## 1. Install dependences and load data

In [1]:
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import cv2
import imghdr
import matplotlib
from PIL import Image
import random
import pickle

In [2]:
# connect to drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


We load the date with pickle. We serialized these objects with pickle in the "data_prep" notebook.
I divided the two notebooks so that I can use 'run all' more convenientely here.

In [3]:
pickle_in = open( "/content/drive/MyDrive/ml_project/"+"img_train.pickle","rb")
img_train = pickle.load(pickle_in)

pickle_in = open( "/content/drive/MyDrive/ml_project/"+"label_train.pickle","rb")
label_train = pickle.load(pickle_in)


pickle_in = open( "/content/drive/MyDrive/ml_project/"+"img_test.pickle","rb")
img_test = pickle.load(pickle_in)

pickle_in = open( "/content/drive/MyDrive/ml_project/"+"label_test.pickle","rb")
label_test = pickle.load(pickle_in)


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/ml_project/img_train.pickle'

In [None]:
# Avoid OutOfMemory error | I do not have a GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
print(gpus)

## Check and pre-process data

In [None]:
# Define the paths to the train and test directories
train_dir = '/content/train'
test_dir = '/content/test'

In [None]:
# We visualize the images to get an idea of their structure / MUFFINS
images=[]
f, axarr = plt.subplots(2,2, figsize=(10,10))
im_index=0

for row in range(2):
    for column in range(2):
        path=os.path.join(train_dir,'muffin')
        images.append(cv2.imread(os.path.join(path,os.listdir(path)[im_index])))
        axarr[row,column].imshow(cv2.cvtColor(images[im_index], cv2.COLOR_BGR2RGB))
        im_index+=1
        axarr[row,column].axis("off")

plt.show()

In [None]:
# We visualize the images to get an idea of their structure / CHIHUAHUAS
images=[]
f, axarr = plt.subplots(2,2, figsize=(10,10))
im_index=0

for row in range(2):
    for column in range(2):
        path=os.path.join(train_dir,'chihuahua')
        images.append(cv2.imread(os.path.join(path,os.listdir(path)[im_index])))
        axarr[row,column].imshow(cv2.cvtColor(images[im_index], cv2.COLOR_BGR2RGB))
        im_index+=1
        axarr[row,column].axis("off")

plt.show()

## Build the Model

In [None]:
from tensorflow.keras.models import Sequential # good with 1 input 1 output
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout

In [None]:
# initialize the first model
model0 = Sequential()

# First layer must be input layer or have an input
model0.add(Conv2D(16, (3, 3), 1, activation='relu', input_shape=(150, 150, 1)))  # Change channels from 1 to 3 if using RGB instead of grayscale
# 16 is the number of filters, each filter is 3x3 pixels, 1 is the stride
model0.add(MaxPooling2D())
# This takes the maximum value after the ReLU and returns it
# Check MaxPooling2D?? for each 2x2 region (pool) takes the highest value

model0.add(Conv2D(32, (3, 3), 1, activation='relu'))  # Now 32 filters
model0.add(MaxPooling2D())

model0.add(Conv2D(16, (3, 3), 1, activation='relu'))
model0.add(MaxPooling2D())

model0.add(Flatten())
# When we apply the conv layer, the filters are going to be the last channel.
# We condense the length and width, then the number of filters will form the
# channel value. When we pass values to the dense layer, we don't want multiple
# values, but we want to 'flatten' them into a single value (thus the Flatten layer).

# Dense layers are fully connected layers
model0.add(Dense(256, activation='relu'))  # 256 values as output
model0.add(Dense(1, activation='sigmoid'))  # 1 output only, due to sigmoid

model0.compile('adam', loss=tf.losses.BinaryCrossentropy(), metrics=['accuracy'])
# we use binary cross entropy since it is a classification problem.
#tf.optimizers. to get all the optimizers

model0.summary()

## Train the model

In [None]:
#.fit method -> training component
#.predict -> when we make the predictions
hist0 = model0.fit(img_train,
                   label_train,
                   batch_size = 32,
                   epochs=20,
                   validation_split = 0.15
                   )

# This takes too long..

Check Results of Model


In [None]:
def plot_loss_accuracy(histo):
    fig, axs = plt.subplots(1, 2, figsize=(14, 5))

    # Plot loss
    axs[0].plot(histo.history['loss'], color='teal', label='loss')
    axs[0].plot(histo.history['val_loss'], color='orange', label='val_loss')
    axs[0].set_title('Loss', fontsize=20)
    axs[0].legend(loc='upper left')

    # Plot accuracy
    axs[1].plot(histo.history['accuracy'], color='teal', label='accuracy')
    axs[1].plot(histo.history['val_accuracy'], color='orange', label='val_accuracy')
    axs[1].set_title('Accuracy', fontsize=20)
    axs[1].legend(loc='upper left')

    plt.show()

In [None]:
plot_loss_accuracy(hist0)

In [None]:
# The following two blocks of code should be unnecessary now
fig = plt.figure()
plt.plot(hist0.history['loss'], color='teal', label='loss')
plt.plot(hist0.history['val_loss'],color='orange', label='val_loss')
fig.suptitle('Loss', fontsize=20)
plt.legend(loc='upper left')
plt.show()

In [None]:
fig = plt.figure()
plt.plot(hist.history['accuracy'], color='teal', label='accuracy')
plt.plot(hist.history['val_accuracy'], color='orange',label='val_accuracy')
fig.suptitle('Accuracy', fontsize=20)
plt.legend(loc='upper left')
plt.show()

### Refine the model
The main issue is overfitting, we can try to solve the issue by adding Dropout or a BatchNormalization Layer. After doing this we move to the next model.

The dropout layer is applied on the dense layer before the ouput, using a value of p=0.5, following the methodology of Hinton(2012) (the paper where dropout was initially proposed.

However, according to (more) recent research (Park and Kwak, Analysis on the Dropout Effect in Convolutional Neural Networks), applying a lower level dropout (p=0.1) to convolutional layers can be beneficial.

However, according to some: "We must not use dropout layer after convolutional layer as we slide the filter over the width and height of the input image we produce a 2-dimensional activation map that gives the responses of that filter at every spatial position. So as dropout layer neutralizes (makes it zero) random neurons there are chances of loosing very important feature in an image in our training process." Pooja Sonkar (https://stats.stackexchange.com/users/215170/pooja-sonkar), Where should I place dropout layers in a neural network?, URL (version: 2018-10-05): https://stats.stackexchange.com/q/370325

In [None]:
# initialize the first model with dropout
model0d = Sequential()

model0d.add(Conv2D(16, (3, 3), 1, activation='relu', input_shape=(150, 150, 1)))  # Change channels from 1 to 3 if using RGB instead of grayscale
model0d.add(MaxPooling2D())

model0d.add(Conv2D(32, (3, 3), 1, activation='relu'))
model0d.add(MaxPooling2D())

model0d.add(Conv2D(16, (3, 3), 1, activation='relu'))
model0d.add(MaxPooling2D())

model0d.add(Flatten())

model0d.add(Dense(256, activation='relu'))
model0d.add(Dropout(0.5)) # we add dropout to reduce overfitting
model0d.add(Dense(1, activation='sigmoid'))

model0d.compile('adam', loss=tf.losses.BinaryCrossentropy(), metrics=['accuracy'])

model0d.summary()

In [None]:
# get the history of the model with dropout

hist0d = model0d.fit(img_train,
                   label_train,
                   batch_size = 32,
                   epochs=20,
                   validation_split = 0.15
                   )

plot_loss_accuracy(hist0d)

## Add L2 regularization
L2 regularization requires the addition of an extra term to the loss function of the network.

## Evaluate Performance

The loss decreases steadily over time. However, while initially the validation loss decreases, around the 6th epoch it starts to increaase. This pattern possibly conveys overfitting of our algorithm. To improve this result we may need to apply regularization.

In [None]:
from tensorflow.keras.metrics import Precision, Recall, BinaryAccuracy

In [None]:
# To use the metrics we need to establish instances of them
pre = Precision()
re = Recall()
acc = BinaryAccuracy()

In [None]:
#scaled_iterator_test
len(test)

In [None]:
for batch in scaled_iterator_test: # is scaled_iterator_test correct?
  X, y = batch                     # or test_batch = scaled_iterator_test.next()
  yhat = model.predict(X)
  pre.update_state(y, yhat)
  re.update_state(y, yhat)
  acc.update_state(y, yhat)

In [None]:
print(f"Precision: {pre.result():.2f}, Recall: {re.result():.2f}, Accuracy: {acc.result():.2f}")

## Second model
A second fancier model must be designed. How do I do it? I don't know.

## Save the model
Maybe this can be done after we get the best model, and only for that model


In [None]:
from tensorflow.keras.models import load_model

In [None]:
model.save(os.path.join('models','model1.h5'))
# h5 is a serialization format, like .zip

In [None]:
# "import" the model
new_model = load_model(os.path.join('models','model1.h5'))