# Using Convolutional Neural Networks to analyze Santa Data

## 1.  First things first: Importing images

In [None]:
# import time
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
import numpy as np

np.random.seed(123)

In [None]:
import os, shutil
directory = 'data/'

In [None]:
print(len(os.listdir('data/santa')))
print(len(os.listdir('data/not_santa')))

In [None]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

# get all the data in the directory split/test (180 images), and reshape them
data_all = ImageDataGenerator(rescale=1./255).flow_from_directory( 
        directory, 
        target_size=(64, 64), 
        batch_size = 924, 
        seed = 123) 

In [None]:
images, labels = next(data_all)

In [None]:
print(np.shape(images))
print(np.shape(labels))

### 1.1  Looking at the images

In [None]:
np.shape(images[1])

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.imshow(images[4])
plt.show()

In [None]:
r_img = images[4][:,:,0]
g_img = images[4][:,:,1]
b_img = images[4][:,:,2]

In [None]:
np.shape(b_img)

In [None]:
plt.imshow(b_img, cmap=plt.cm.binary)
plt.show()

### 1.2 Looking at the labels

The first columns represents "no santa", the second one represents "santa"

In [None]:
labels

## 2. Building a densely connected network

### 2.1 reshaping the images

In [None]:
X = images.reshape(images.shape[0], -1)
y = np.reshape(labels[:,0], (922,1))

In [None]:
np.shape(X)

In [None]:
np.shape(y)

### 2.2 Creating a train, test and validation set

In [None]:
from sklearn.model_selection import train_test_split
X_model, X_test, y_model, y_test = train_test_split(X, y, test_size=0.20, random_state=123)

In [None]:
np.shape(X_model)

In [None]:
np.shape(X_test)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_model, y_model, test_size=0.20, random_state=123)

In [None]:
np.shape(X_train)

In [None]:
np.shape(y_train)

In [None]:
np.shape(y_val)

- Our training set contrains 589 images
- Our test set contains 185 images
- Our validation set contains 148 images

Now, let's build our baseline network. You'll build two dense hidden layers, the first one with 128 hidden nodes, and the second one with 64 modes. Use the rectified linear unit as an activation function for both. Remember that the input shape is 64 x 64 x 3 for both layers. The last layer of the network should has a sigmoid activation function (because this is a binary classification problem), and 1 output node.

Let's train the model for 40 epochs in batches of 50.

In [None]:
from keras import models
from keras import layers
from numpy.random import seed
seed(123)
from tensorflow import set_random_seed
set_random_seed(123)

dense = models.Sequential()
dense.add(layers.Dense(128, activation='relu', input_shape=(12288,))) #2 hidden layers
dense.add(layers.Dense(64, activation='relu'))
dense.add(layers.Dense(1, activation='sigmoid'))

dense.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

dense_fit = dense.fit(X_train,
                    y_train,
                    epochs=40,
                    batch_size=50,
                    validation_data=(X_val, y_val))

In [None]:
hist_dense = dense_fit.history
loss_values = hist_dense['loss']
val_loss_values = hist_dense['val_loss']

epochs = range(1, len(loss_values) + 1)
plt.plot(epochs, loss_values, 'g.', label='Training loss')
plt.plot(epochs, val_loss_values, 'g', label='Validation loss')

plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
acc_values = hist_dense['acc'] 
val_acc_values = hist_dense['val_acc']

plt.plot(epochs, acc_values, 'r.', label='Training acc')
plt.plot(epochs, val_acc_values, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
results_train = dense.evaluate(X_train, y_train)

In [None]:
results_test = dense.evaluate(X_test, y_test)

In [None]:
results_train

In [None]:
results_test

## 3. Convolutional neural network

In [None]:
from sklearn.model_selection import train_test_split
X_model, X_test, y_model, y_test = train_test_split(images, y, test_size=0.20, random_state=123)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_model, y_model, test_size=0.20, random_state=123)

In [None]:
from numpy.random import seed
seed(123)
from tensorflow import set_random_seed
set_random_seed(123)

model = models.Sequential()
model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(64, 64,  3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer="sgd",
              metrics=['acc'])
history = model.fit(X_train,
                    y_train,
                    epochs=40,
                    batch_size=50,
                    validation_data=(X_val, y_val))

In [None]:
model.summary()

In [None]:
# filter is 3*3*3, 64 or them and bias term for each filter
3*3*3*64+64

In [None]:
# filter for second one

32*3*3*64+32

In [None]:
hist_cnn = history.history
loss_values = hist_cnn['loss']
val_loss_values = hist_cnn['val_loss']

epochs = range(1, len(loss_values) + 1)
plt.plot(epochs, loss_values, 'g.', label='Training loss')
plt.plot(epochs, val_loss_values, 'g', label='Validation loss')

plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
acc_values = hist_cnn['acc'] 
val_acc_values = hist_cnn['val_acc']

plt.plot(epochs, acc_values, 'r.', label='Training acc')
plt.plot(epochs, val_acc_values, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
results_train = model.evaluate(X_train, y_train)

In [None]:
results_test = model.evaluate(X_test, y_test)

In [None]:
results_train

In [None]:
results_test

## 4. Drop-out regularization

In [None]:
from numpy.random import seed
seed(123)
from tensorflow import set_random_seed
set_random_seed(123)

dropout = models.Sequential()
dropout.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(64, 64,  3)))
dropout.add(layers.MaxPooling2D((2, 2)))
dropout.add(layers.Conv2D(32, (3, 3), activation='relu'))
dropout.add(layers.MaxPooling2D((2, 2)))

dropout.add(layers.Flatten())
dropout.add(layers.Dropout(0.5))
dropout.add(layers.Dense(32, activation='relu'))
dropout.add(layers.Dense(1, activation='sigmoid'))

dropout.compile(loss='binary_crossentropy',
              optimizer="sgd",
              metrics=['acc'])
drop_model = dropout.fit(X_train,
                    y_train,
                    epochs=40,
                    batch_size=50,
                    validation_data=(X_val, y_val))

In [None]:
hist_cnn_drop = drop_model.history
loss_values = hist_cnn_drop['loss']
val_loss_values = hist_cnn_drop['val_loss']

epochs = range(1, len(loss_values) + 1)
plt.plot(epochs, loss_values, 'g.', label='Training loss')
plt.plot(epochs, val_loss_values, 'g', label='Validation loss')

plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
acc_values = hist_cnn_drop['acc'] 
val_acc_values = hist_cnn_drop['val_acc']

plt.plot(epochs, acc_values, 'r.', label='Training acc')
plt.plot(epochs, val_acc_values, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
results_train = dropout.evaluate(X_train, y_train)

In [None]:
results_test = dropout.evaluate(X_test, y_test)

In [None]:
results_train

In [None]:
results_test

## 5. Try tweaking the model, the possibilities are endless!
- add/remove layers
- In some layers, change `padding = valid` and `padding = same`
- change activation functions
- change optimizer
- change batch size
- change patch dimensions (from 3 x 3 to 5 x 5)
- If you have more time, try running everything on the bigger training set! Do results improve?