"""
    
    Image classification model for a specific domain using transfer learning with a pretrained CNN model.
    
    Author: DITI SAI NAGA MAHESH
    
    Date: 8th APR. 2024

    Time : 11:35
"""

#**Image classification model for a specific domain using transfer learning with a pretrained CNN model**

#**IMPORTING THE LIBRARIES**

In [40]:
import pandas as pd
import numpy as np

#**DATA PREPARATION**

In [41]:
# read digit csv file using pandas
train_set = pd.read_csv('train.csv')
test_set = pd.read_csv('test.csv')

In [42]:
# visualize the data
train_set.head(3)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
test_set.head(3)

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [44]:
trainset_without_labels = train_set.iloc[:, 1:785] # asbtracting train_set from 1 to 785 that is all pixels.. without lables

In [45]:
trainset_without_labels.tail(3)

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
33841,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33842,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33843,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,


In [46]:
trainset_labels = train_set.iloc[:, 0] # abstracting out labels from training set

In [47]:
trainset_labels[:4] # visualize the labels

0    1
1    0
2    1
3    4
Name: label, dtype: int64

In [48]:
X_test = test_set.iloc[:, 0:784] # its good: we don't have labels in the test set

In [49]:
X_test.tail()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27999,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
from sklearn.model_selection import train_test_split
# validation set is 20 percent
X_train, X_cv, Y_train, Y_cv = train_test_split(trainset_without_labels, trainset_labels, test_size = 0.2, random_state = 1111)

In [52]:
X_train = np.array(X_train)


In [54]:
total_elements = X_train.size

In [55]:
num_rows = total_elements // 784
num_cols = 784

In [56]:
X_train = X_train.reshape(num_rows, num_cols)

In [57]:
print(X_train.shape)

(27075, 784)


In [60]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [62]:
X_cv = X_cv.to_numpy()

In [64]:
X_cv

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [66]:
X_test

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [68]:
# Data Cleaning and Normalization:
# At first, lets check the pixels intensities range:
print(min(X_train[1]), max(X_train[1]))

0.0 255.0


In [69]:
# so the pixels intensities are currently between the range of 0 and 255, we proceed to normalize the features, using broadcasting,
# Feature Normalization
X_train = X_train.astype('float32')
X_cv = X_cv.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_cv /= 255
X_test /= 255

In [70]:
# import keras
from keras.models import Sequential
from keras.layers import *
from keras.utils import to_categorical

In [71]:
# also we convert our lables from a class vector to binary One Hot Encoded
# Convert lables to ONE HOT ENCODED
num_of_digits = 10
y_train = to_categorical(Y_train, num_classes=num_of_digits)
# same for validation set
y_cv = to_categorical(Y_cv, num_classes=num_of_digits)

In [72]:
# to check!
y_train[0] # is number 7

array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], dtype=float32)

#**Model Architecture**

In [73]:
# Model Fitting:
# since its an empirical process so we try different model with different optimizers along with tuning hyperparameters
model = Sequential()
model.add(Dense(units=300, activation='relu', name='1st_Hidden_Layer', input_dim=784)) # input_dim = fetures = columns = 784
model.add(Dense(units=100, activation='relu', name='2nd_Hidden_Layer'))
model.add(Dense(units=100, activation='relu', name='3rd_Hidden_Layer'))
model.add(Dense(units=200, activation='relu', name='4th_Hidden_Layer'))
model.add(Dense(units=num_of_digits, activation='softmax', name='Output_Layer'))

In [74]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 1st_Hidden_Layer (Dense)    (None, 300)               235500    
                                                                 
 2nd_Hidden_Layer (Dense)    (None, 100)               30100     
                                                                 
 3rd_Hidden_Layer (Dense)    (None, 100)               10100     
                                                                 
 4th_Hidden_Layer (Dense)    (None, 200)               20200     
                                                                 
 Output_Layer (Dense)        (None, 10)                2010      
                                                                 
Total params: 297910 (1.14 MB)
Trainable params: 297910 (1.14 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [75]:
# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
# note: loss function is categorical becoz we have 10 classes to classify for e.g. we'd do binary classification
# we are using Stochastic Gradient Descent as our optimizer
# we are interested in accruacy metrics for now!

In [76]:
# Fitting the Model
model.fit(x=X_train, y=y_train, batch_size=100, epochs=20, verbose=2, validation_data=(X_cv, y_cv))
# we are using 100 batch_size with 20 epochs for fitting the model
# also we are giving validation data too

Epoch 1/20
271/271 - 3s - loss: nan - accuracy: 0.1047 - val_loss: nan - val_accuracy: 0.0971 - 3s/epoch - 10ms/step
Epoch 2/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 8ms/step
Epoch 3/20
271/271 - 3s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 3s/epoch - 10ms/step
Epoch 4/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 7ms/step
Epoch 5/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 7ms/step
Epoch 6/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 7ms/step
Epoch 7/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 8ms/step
Epoch 8/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 8ms/step
Epoch 9/20
271/271 - 3s - loss: nan - accuracy: 0.0975 - val_loss: nan

<keras.src.callbacks.History at 0x78ac7cf77b20>

In [77]:
# We had validation score that is - training score of 95.60%
# We can also check for test score  but we don't have label for that now!
# could submit it in the kaggle and know it or see it..

In [78]:
# Let's use Adam optimizer for building the model since it is said to increase lots of performance
# building different model!
model2 = Sequential()
model2.add(Dense(activation='relu', units=300, name='1st_Hidden_Layer', input_dim=784)) # input layer dim is number of varibales or features
model2.add(Dense(activation='relu', units=100, name='2nd_Hidden_Layer'))
model2.add(Dense(activation='relu', units=100, name='3rd_Hidden_Layer'))
model2.add(Dense(activation='relu', units=200, name='4th_Hidden_Layer'))
model2.add(Dense(activation='softmax', units=num_of_digits, name='Output_Layer'))
# note: name=" should not have space in them"

In [79]:
model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 1st_Hidden_Layer (Dense)    (None, 300)               235500    
                                                                 
 2nd_Hidden_Layer (Dense)    (None, 100)               30100     
                                                                 
 3rd_Hidden_Layer (Dense)    (None, 100)               10100     
                                                                 
 4th_Hidden_Layer (Dense)    (None, 200)               20200     
                                                                 
 Output_Layer (Dense)        (None, 10)                2010      
                                                                 
Total params: 297910 (1.14 MB)
Trainable params: 297910 (1.14 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [80]:
# compiling another model
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [81]:
# fitting the model:
model2.fit(x=X_train, y=y_train, batch_size=100, epochs=20, verbose=2, validation_data=(X_cv, y_cv))

Epoch 1/20
271/271 - 4s - loss: nan - accuracy: 0.5613 - val_loss: nan - val_accuracy: 0.0971 - 4s/epoch - 13ms/step
Epoch 2/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 8ms/step
Epoch 3/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 8ms/step
Epoch 4/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 8ms/step
Epoch 5/20
271/271 - 3s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 3s/epoch - 13ms/step
Epoch 6/20
271/271 - 3s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 3s/epoch - 9ms/step
Epoch 7/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 8ms/step
Epoch 8/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan - val_accuracy: 0.0971 - 2s/epoch - 8ms/step
Epoch 9/20
271/271 - 2s - loss: nan - accuracy: 0.0975 - val_loss: nan

<keras.src.callbacks.History at 0x78ac63b12230>

In [82]:
"""
    As it turns out, it does appear to be the case that the optimizer plays a crucial part in the validation score.
    In particular, the model which relies on 'Adam' as its optimizer tend to perform 1.5 - 2.5% better on average.
    Going forward, we will use 'Adam' as our optimizer of choice.
"""

"\n    As it turns out, it does appear to be the case that the optimizer plays a crucial part in the validation score. \n    In particular, the model which relies on 'Adam' as its optimizer tend to perform 1.5 - 2.5% better on average. \n    Going forward, we will use 'Adam' as our optimizer of choice.\n"

In [83]:
# We now proceed to include dropout (dropout rate of 0.3) in our model to prevent overfitting.
# before that: we'd try adding another hidden layer and see if its make sense! .. did it and it doesn't change performance alot
# so let's stick with our model2 architechture and add drop out in it!
model3 = Sequential()
model3.add(Dense(units=300, activation='relu', name='1st_hidden_layer', input_dim=784))
model3.add(Dropout(0.3))
model3.add(Dense(units=100, activation='relu', name='2nd_hidden_layer'))
model3.add(Dropout(0.3))
model3.add(Dense(units=100, activation='relu', name='3rd_hidden_layer'))
model3.add(Dropout(0.3))
model3.add(Dense(units=200, activation='relu', name='4th_hidden_layer'))
model3.add(Dense(units=num_of_digits, activation='softmax', name='Output_Layer'))

In [84]:
model3.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 1st_hidden_layer (Dense)    (None, 300)               235500    
                                                                 
 dropout (Dropout)           (None, 300)               0         
                                                                 
 2nd_hidden_layer (Dense)    (None, 100)               30100     
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 3rd_hidden_layer (Dense)    (None, 100)               10100     
                                                                 
 dropout_2 (Dropout)         (None, 100)               0         
                                                                 
 4th_hidden_layer (Dense)    (None, 200)              

In [85]:
# compile the model3
model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [86]:
# fit the model:
model3.fit(x=X_train, y=y_train, batch_size=100, epochs=20, validation_data=(X_cv, y_cv))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x78ac65d1d810>