# 60000-level section

Instead of the SVM, let's use a perceptron as a classifier. And train it to recognize all 101 CALTECH classes (not just 3 or 10). First, use just one fully connected layer (hence a linear classsification). Next, following the "sky is the limit" sentence, try a few MLP architectures to see which one works best for your classes. **Remember to change your runtime type to GPU! (in menu above: Runtime -> Change runtime type -> GPU)**

In [None]:
"""
    Student:        Byron Dowling
    Class:          Computer Vision (CSE 60535)
    Term:           University of Notre Dame, Fall 2023
    Assignment:     Practical #4 : Deep Learning-based Object Detection
"""

# Two lines below (when uncommented) allow you to track the time spent on each cell, if you wanted
!pip install ipython-autotime
%load_ext autotime

import cv2
import os
import sys
import numpy as np
from sklearn import svm
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
import torchvision.datasets as datasets
from torchvision.transforms import Compose, Resize
import tensorflow as tf

In [None]:
# Let's download and extract the Caltech101 dataset.
!wget -c https://notredame.box.com/shared/static/o5hw6ljq7x00smui4ixo9akxwlq2dkib.gz -O caltech101.tar.gz
%mkdir ./caltech101/
!tar -zxf caltech101.tar.gz -C ./caltech101/

# We need to convert PIL images in to OpenCV images
class ToCV2(object):

    def __call__(self, sample):
        #image, label = sample['image'], sample['label']

        image = np.array(sample.convert('RGB'))
        image = image[:, :, ::-1].copy()

        return image

caltech101_dataset = datasets.Caltech101(root='./', transform=Compose([Resize((224, 224)), ToCV2()]))

In [None]:
# First, we split the dataset into train and validation partitions

from sklearn.model_selection import train_test_split
from tqdm import tqdm
from tensorflow.keras.utils import to_categorical

X = []
Y = []

for image, label in tqdm(caltech101_dataset):
    X.append(image)
    Y.append(int(label))

X = np.array(X)
Y = np.array(Y)

Y = to_categorical(Y)

x_train, x_valid, y_train, y_valid = train_test_split(X, Y, test_size=0.8, shuffle=True)

In [None]:
# We are utilizing Keras API to load the pretrained VGG16 model as our feature extractor for training SVM
model = VGG16(weights='imagenet')
cnn_codes = 'fc2'
features_model = Model(inputs=model.input, outputs=model.get_layer(cnn_codes).output)

# Next, we can train using our splits and try out different model architectures
# Inform the optimizer that we are not going to fine-tune the VGG (feature extraction) model
features_model.trainable = False

# Create your MLP (instead of SVM) on top of the VGG model
inputs = tf.keras.Input(shape=(224, 224, 3))
x = features_model(inputs, training=False)

# *** Task 4a ***
# Add several layers between the VGG model and the output layer.
# These can be fully-connected ("Dense"), convolutional, pooling or dropout layers.
# The syntax always will be x = tf.keras.layers.LAYER(...)(x), where LAYER is the correct name
# you can find in tensorflow documentation: https://www.tensorflow.org/api_docs/python/tf/keras/layers

D1 = tf.keras.layers.Dense(128, activation='relu')(x)
drop_out1 = tf.keras.layers.Dropout(rate=0.25)(D1)
D2 = tf.keras.layers.Dense(128, activation='relu')(drop_out1)
drop_out2 = tf.keras.layers.Dropout(rate=0.25)(D2)

# *** Task 4b ***
# Use outputs = tf.keras.layers.Dense(...)(x) to define the correct output of your single-layer perceptron,
# that is at least select the correct number of output neurons and the softmax activation function
outputs = tf.keras.layers.Dense(101, activation='softmax')(drop_out2)

full_model = tf.keras.Model(inputs=inputs, outputs=outputs)
print(full_model.summary())

# *** Task 4c ***
# Number of epochs (= number of presentations of the full training set to the model while training)
# is arbitrarily set to 3 now. But you may increase this number and observe the validation (not training) loss.
# When it stagnates, it's a good moment to stop training and take these weights to your final model.
epochs = 15



# Compile (= build) the model, define when to save checkpoints, and train it!
full_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
checkpoint = tf.keras.callbacks.ModelCheckpoint("caltech101.h5", monitor="val_accuracy",verbose=1, save_best_only=True, save_weights_only=False, mode='auto', save_freq='epoch')
full_model.fit(x_train, y_train, epochs=epochs, verbose=1, validation_data = (x_valid, y_valid), callbacks=[checkpoint])