In [None]:
import pickle
from sklearn.model_selection import train_test_split
import keras
import numpy as np
from keras import Sequential
from keras import callbacks
from keras.utils import to_categorical
from keras.layers import Dropout, BatchNormalization, Flatten, Dense
from google.colab import drive

In [None]:
np.random.seed(42)

In [None]:
! gdown --id 1lvFeDE7qdwYTR9NVqBnoYGpXcPPaZUf9    # upload data (embeddings) from Google Drive 

Downloading...
From: https://drive.google.com/uc?id=1lvFeDE7qdwYTR9NVqBnoYGpXcPPaZUf9
To: /content/img_embeddings.pkl
100% 3.54M/3.54M [00:00<00:00, 136MB/s]


In [None]:
! gdown --id 1DJ6tWI6jJxFwNaQv_6NUGfSkGR_BALc8     # upload data (labels) from Google Drive

Downloading...
From: https://drive.google.com/uc?id=1DJ6tWI6jJxFwNaQv_6NUGfSkGR_BALc8
To: /content/img_labels.pkl
100% 27.8k/27.8k [00:00<00:00, 22.9MB/s]


In [None]:
drive.mount("/content/drive/")

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
def get_data(file_path: str) -> np.ndarray:
    with open(file_path, 'rb') as file:
        return pickle.load(file)

In [None]:
embeddings = get_data('/content/img_embeddings.pkl')
labels = get_data('/content/img_labels.pkl')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, shuffle=True, random_state=42)

In [None]:
print(f'Training sample size: {len(X_train)}')
print(f'Test sample size: {len(X_test)}')

Training sample size: 2768
Test sample size: 692


In [None]:
y_train = to_categorical(y_train, 233)
y_test = to_categorical(y_test, 233)

In [None]:
model = Sequential([])
model.add(Dense(128, input_shape=(128, ), name='input_layer'))
model.add(Dense(150, activation='elu', name='hidden_layer_1'))
model.add(Dropout(0.4, name='dropout_1'))
model.add(Dense(150, activation='elu', name='hidden_layer_2'))
model.add(Dense(150, activation='elu', name='hidden_layer_3'))
model.add(BatchNormalization(name='batch_normalization'))
model.add(Dense(150, activation='elu', name='hidden_layer_4'))
model.add(Dropout(0.4, name='dropout_2'))
model.add(Dense(233, activation='softmax', name='output_layer'))

In [None]:
callbacks = [
    callbacks.EarlyStopping(monitor='loss', min_delta=0.01, patience=13, verbose=1),  
    callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, min_delta=0.01, min_lr=1e-10, patience=5, verbose=1, mode='auto')
]

In [None]:
model.compile(metrics=['accuracy'], loss='categorical_crossentropy', optimizer='adam')

In [None]:
model.fit(X_train, y_train, batch_size=72, epochs=70, validation_split=0.2,  callbacks=callbacks)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 46: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 57: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 64: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 6

<keras.callbacks.History at 0x7f1ab25f8f50>

In [None]:
print(f'Metric on test: {model.evaluate(X_test, y_test)}')
print(f'Metric on train: {model.evaluate(X_train, y_train)}')

Metric on test: [0.6174319386482239, 0.8800578117370605]
Metric on train: [0.11070176213979721, 0.97398841381073]


In [235]:
model.summary()

Model: "sequential_23"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (Dense)         (None, 128)               16512     
                                                                 
 hidden_layer_1 (Dense)      (None, 150)               19350     
                                                                 
 dropout_1 (Dropout)         (None, 150)               0         
                                                                 
 hidden_layer_2 (Dense)      (None, 150)               22650     
                                                                 
 hidden_layer_3 (Dense)      (None, 150)               22650     
                                                                 
 batch_normalization (BatchN  (None, 150)              600       
 ormalization)                                                   
                                                     

In [None]:
model_file_path = '/content/drive/My Drive/Colab Notebooks/Data/dl_model'
model.save(model_file_path)