# COMP 551 Project 3 - Eric Liu, Ajay Patel, Aaron Sossin
VGG Net Model


## Install libraries and datasets

In [0]:
!pip install tensorflow

from google.colab import files
files.upload()
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!kaggle competitions download -c modified-mnist
!unzip test_max_x.zip
!unzip train_max_x.zip

## Train model

In [0]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import utils, Sequential, layers, callbacks
from tensorflow.keras.optimizers import Adam, SGD
import pandas as pd
import numpy as np

train_images = pd.read_pickle('train_max_x')
train_labels = np.array(pd.read_csv('train_max_y.csv'))[:,1]

x_train, x_test, y_train, y_test = train_test_split(train_images, train_labels, test_size=0.15, stratify=train_labels)
x_train = x_train.reshape(x_train.shape[0], 128, 128, 1)
x_test = x_test.reshape(x_test.shape[0], 128, 128, 1)
x_train = x_train.astype('float64')
x_test = x_test.astype('float64')

# normalize
x_train /= 255.0
x_test /= 255.0

y_train = utils.to_categorical(y_train, 10)
y_test = utils.to_categorical(y_test, 10)

# HYPERPARAMS
batch_size = 32
epochs = 25
input_shape = (128, 128, 1)
num_classes = 10
learning_rate = 0.001

model = Sequential()
model.add(layers.Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=input_shape))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(32, kernel_size=(3,3), activation = 'relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2)))

model.add(layers.Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(64, kernel_size=(3,3), activation = 'relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2)))

model.add(layers.Conv2D(128, kernel_size=(3,3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, kernel_size=(3,3), activation = 'relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2)))

model.add(layers.Conv2D(256, kernel_size=(3,3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, kernel_size=(3,3), activation = 'relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2)))

model.add(layers.Flatten())
model.add(layers.Dense(10, activation='softmax'))

optimizer = Adam(learning_rate=learning_rate)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

lr_reducer = callbacks.ReduceLROnPlateau(monitor='val_acc',patience=5,verbose=1,factor=0.5)
early_stop = callbacks.EarlyStopping(monitor='val_acc',patience=10,verbose=1)
callbacks = [lr_reducer, early_stop]

history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), callbacks=callbacks) 

## TRAINING RESULTS (10 epochs unless specified otherwise, results on last epoch)
> Always training on 85% of data, validating on 15% of data.

CALLBACK CONFIGURATION:
- ReduceLROnPlateau(monitor='val_acc',patience=3,verbose=1,factor=0.5)
- EarlyStopping(monitor='val_acc',patience=5,verbose=1)

### Adam Optimizer
- No callbacks, batch size 32: training accuracy of 0.9676 and validation accuracy of 0.9365. Max validation accuracy of 0.9365 on epoch 10
- No callbacks, batch size 64: training accuracy of 0.9735 and validation accuracy of 0.9348. Max validation accuracy of 0.9348 on epoch 10
- No callbacks, batch size 128: training accuracy of 0.9771 and validation accuracy of 0.9185. Max validation accuracy of 0.9377 on epoch 9
- LRReducer and EarlyStop callbacks, batch size 32: training accuracy of 0.9680 and validation accuracy of 0.9208. Max validation accuracy of 0.9347 on epoch 8.
- LRReducer and EarlyStop callbacks, batch size 64: training accuracy of 0.9672 and validation accuracy of 0.9351. Max validation accuracy of 0.9376 on epoch 7.
- LRReducer and EarlyStop callbacks, batch size 128: training accuracy of 0.9776 and validation accuracy of 0.9140. Max validation accuracy of 0.9372 on epoch 9.

25 epochs
- LRReducer and EarlyStop callbacks, batch size 32: training accuracy of 0.9998 and validation accuracy of 0.9563. **Early stopped on epoch 23 (stagnation)**. Max validation accuracy of 0.9571 on epoch 18, but difference is minimal. **SUBMIT THIS TO KAGGLE TONIGHT**

100 epochs
- LRReducer and EarlyStop callbacks, batch size 128: training accuracy of 0.9993 and validation accuracy of 0.9421. **Early stopped on epoch 31**. Max validation accuracy of 0.9483 on epoch 21.
- LRReducer and EarlyStop callbacks, batch size 32: training accuracy of 0.9993 and validation accuracy of 0.9421. **Early stopped on epoch 31 (stagnation)**. Max validation accuracy of 0.9483 on epoch 21.

### SGD Optimizer
> Takes many more epochs to reach a good training and validation accuracy. Reaches a very high training accuracy much faster (0.98 at epoch 12), and peaks at near perfect training accuracy, but validation accuracy is quite low (overfitting). Efforts can be explored made to reduce overfitting (regularization).

25 Epochs
- LRReducer and EarlyStop callbacks, batch size 32: training accuracy of 0.9996 and validation accuracy of 0.8676. Max validation accuracy of 0.8776 on epoch 23.
- Example of overfitting: at epoch 10, had training accuracy of 0.9325 yet validation accuracy of 0.3791


## Test model and output predictions

In [0]:
import pandas as pd
import numpy as np
from google.colab import files

test_images = pd.read_pickle('test_max_x')

test = test_images.reshape(test_images.shape[0], 128, 128, 1)
test = test.astype('float64')
test /= 255

predictions = model.predict(test, batch_size=batch_size)
preds = []
for pred in predictions:
    preds.append(np.argmax(pred))
print(preds)

# write to csv
import csv
with open('predictions.csv', 'w') as file:
    writer = csv.writer(file)
    writer.writerow(['Id', 'Label'])
    for i, p in enumerate(preds):
        writer.writerow([str(i), str(p)])
    
files.download('predictions.csv')