In [1]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
(x_train_full, y_train_full), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

#### using image size 96x96 as MobileNetV2 doesnt work well with smaller images

In [3]:
image_size = 96

x_train_full_resized = tf.image.resize(x_train_full, (image_size, image_size)).numpy().astype('float32')
x_test_resized = tf.image.resize(x_test, (image_size, image_size)).numpy().astype('float32')

In [4]:
x_train_resized, x_val_resized, y_train, y_val = train_test_split(x_train_full_resized, y_train_full, test_size=0.2, random_state=42)

In [5]:
x_train_resized /= 255.0
x_val_resized /= 255.0
x_test_resized /= 255.0

In [6]:
num_classes = 10

y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

#### consistently ran out of memory on my GPU, had to resort to using CPU at the cost of speed

In [7]:
buffer_size = 10000  
batch_size = 64  

# Training Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((x_train_resized, y_train))
train_dataset = train_dataset.shuffle(buffer_size=buffer_size).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

# Validation Dataset
val_dataset = tf.data.Dataset.from_tensor_slices((x_val_resized, y_val))
val_dataset = val_dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

# Testing Dataset
test_dataset = tf.data.Dataset.from_tensor_slices((x_test_resized, y_test))
test_dataset = test_dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)


In [8]:
base_model = MobileNetV2(input_shape=(image_size, image_size, 3), include_top=False, weights='imagenet')

In [9]:
base_model.trainable = False

In [10]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(num_classes, activation='softmax', dtype='float32')(x)

In [11]:
model = Model(inputs=base_model.input, outputs=outputs)

In [12]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [13]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-5)

#### Had the model set for 20 epochs but it started overfitting at around the 15th epoch. 

### An accuracy of 79.7% on the test dataset is still decent without finetuning an uplift of 10% from the 70% accuracy that I got on the basemodel

In [14]:
history = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=20,
                    callbacks=[early_stopping, reduce_lr])

Epoch 1/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m274s[0m 262ms/step - accuracy: 0.5049 - loss: 1.5077 - val_accuracy: 0.7515 - val_loss: 0.7702 - learning_rate: 0.0010
Epoch 2/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m161s[0m 258ms/step - accuracy: 0.6761 - loss: 0.9387 - val_accuracy: 0.7622 - val_loss: 0.6975 - learning_rate: 0.0010
Epoch 3/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 259ms/step - accuracy: 0.6977 - loss: 0.8722 - val_accuracy: 0.7721 - val_loss: 0.6763 - learning_rate: 0.0010
Epoch 4/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m161s[0m 258ms/step - accuracy: 0.7073 - loss: 0.8374 - val_accuracy: 0.7810 - val_loss: 0.6613 - learning_rate: 0.0010
Epoch 5/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 259ms/step - accuracy: 0.7125 - loss: 0.8190 - val_accuracy: 0.7830 - val_loss: 0.6473 - learning_rate: 0.0010
Epoch 6/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━

In [15]:
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 208ms/step - accuracy: 0.7988 - loss: 0.5936
Test Loss: 0.5948, Test Accuracy: 0.7970


#### Unfreezing the last 4 layers of the model and reducing the learning improves the accuracy on the test dataset to 83.8%

In [16]:
for layer in base_model.layers[-4:]:
    layer.trainable = True


model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


finetuned_model = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=10,
                    callbacks=[early_stopping, reduce_lr])

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 254ms/step - accuracy: 0.7292 - loss: 0.9125 - val_accuracy: 0.7884 - val_loss: 0.6436 - learning_rate: 1.0000e-04
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 267ms/step - accuracy: 0.7662 - loss: 0.6798 - val_accuracy: 0.8090 - val_loss: 0.5720 - learning_rate: 1.0000e-04
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 262ms/step - accuracy: 0.7871 - loss: 0.6181 - val_accuracy: 0.8220 - val_loss: 0.5344 - learning_rate: 1.0000e-04
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 277ms/step - accuracy: 0.7998 - loss: 0.5764 - val_accuracy: 0.8272 - val_loss: 0.5158 - learning_rate: 1.0000e-04
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 277ms/step - accuracy: 0.8130 - loss: 0.5431 - val_accuracy: 0.8313 - val_loss: 0.5031 - learning_rate: 1.0000e-04
Epoch 6/10
[1m625/625[0m [3

In [17]:
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 210ms/step - accuracy: 0.8371 - loss: 0.4836
Test Loss: 0.4814, Test Accuracy: 0.8380


#### With more compute power and time, I would unfreeze more layers as well as introduce data augmentation. I believe this would push the accuracy to more than 90%