### Importing Necessary Libraries

In [74]:
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

import xgboost as xgb
from xgboost import XGBClassifier

import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array

from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAvgPool2D, BatchNormalization
# from tensorflow.keras.applications import EfficientNetV2L
from tensorflow.keras.regularizers import l2

from tensorflow.keras.optimizers import Adam

from sklearn.metrics import precision_recall_fscore_support as score

## Data

A detailed discussion and visualization of the data can be seen in [here](Data-Overview.ipynb).

In [4]:
labels = pd.read_csv("./Data/train_labels.csv")
images = ['./Data/train/train/'+str(name)+'.tif' for name in labels['id']]
labels['images'] = images
labels = labels[['id', 'images', 'label']]

In [5]:
labels

Unnamed: 0,id,images,label
0,0,./Data/train/train/0.tif,1
1,1,./Data/train/train/1.tif,13
2,2,./Data/train/train/2.tif,13
3,3,./Data/train/train/3.tif,14
4,4,./Data/train/train/4.tif,6
...,...,...,...
15995,15995,./Data/train/train/15995.tif,2
15996,15996,./Data/train/train/15996.tif,15
15997,15997,./Data/train/train/15997.tif,3
15998,15998,./Data/train/train/15998.tif,9


In [6]:
train_labels = pd.read_csv('./Data/train_labels_1.csv')
test_labels = pd.read_csv('./Data/test_labels_1.csv')

In [7]:
train_labels

Unnamed: 0,id,images,label
0,0,./Data/train/train/0.tif,1
1,1,./Data/train/train/1.tif,13
2,2,./Data/train/train/2.tif,13
3,3,./Data/train/train/3.tif,14
4,4,./Data/train/train/4.tif,6
...,...,...,...
15195,15995,./Data/train/train/15995.tif,2
15196,15996,./Data/train/train/15996.tif,15
15197,15997,./Data/train/train/15997.tif,3
15198,15998,./Data/train/train/15998.tif,9


In [8]:
test_labels

Unnamed: 0,id,images,label
0,18,./Data/train/train/18.tif,11
1,25,./Data/train/train/25.tif,8
2,33,./Data/train/train/33.tif,6
3,41,./Data/train/train/41.tif,1
4,65,./Data/train/train/65.tif,14
...,...,...,...
795,15816,./Data/train/train/15816.tif,2
796,15824,./Data/train/train/15824.tif,2
797,15832,./Data/train/train/15832.tif,11
798,15863,./Data/train/train/15863.tif,15


In [9]:
class_labels = list(set(labels['label']))
class_names = [
    'letter', 'form', 'email', 'handwritten', 'advertisement', 'scientific report', 'scientific publication',
    'specification', 'file folder', 'news article', 'budget', 'invoice', 'presentation', 'questionnaire', 'resume',
    'memo'
]
label_names = pd.DataFrame({
    'labels': class_labels,
    'names': class_names
})

## Data Preparation

We have already generated the Extracted Feature Vectors from the ResNet Model now we train using that, so we load those feature vectors.

In [10]:
fine_tune_data_resnet = np.load('./Data/ResNet-4P-train-precompute/whole_train_data_precomp.npy')
fine_tune_data_resnet.shape

(16000, 5, 2048)

In [11]:
N_COMPS_1 = 1300 # 1600
N_COMPS_2 = 300 # 400
N_COMPS_3 = 900 # 1000
random_state = 43

In [12]:

pcas_resnet = [PCA(N_COMPS_1, random_state=random_state) for _ in range(5)]

In [13]:
for i in range(5):
    pcas_resnet[i].fit(fine_tune_data_resnet[:,i,:])

In [14]:
fine_tune_data_train_red_resnet = np.zeros((fine_tune_data_resnet.shape[0], 5, N_COMPS_1))
for i in range(5):
    fine_tune_data_train_red_resnet[:,i,:] = pcas_resnet[i].transform(fine_tune_data_resnet[:,i,:])
fine_tune_data_resnet = fine_tune_data_train_red_resnet
del fine_tune_data_train_red_resnet

In [15]:
fine_tune_data_resnet.shape

(16000, 5, 1300)

In [16]:
fine_tune_data_vgg = np.load('./Data/VGG16-4P-train-precompute/whole_train_data_precomp.npy')
fine_tune_data_vgg.shape

(16000, 5, 512)

In [17]:

pcas_vgg = [PCA(N_COMPS_2, random_state=random_state) for _ in range(5)]

In [18]:
for i in range(5):
    pcas_vgg[i].fit(fine_tune_data_vgg[:,i,:])

In [19]:
fine_tune_data_train_red_vgg = np.zeros((fine_tune_data_vgg.shape[0], 5, N_COMPS_2))
for i in range(5):
    fine_tune_data_train_red_vgg[:,i,:] = pcas_vgg[i].transform(fine_tune_data_vgg[:,i,:])
fine_tune_data_vgg = fine_tune_data_train_red_vgg
del fine_tune_data_train_red_vgg
fine_tune_data_vgg.shape

(16000, 5, 300)

In [20]:
fine_tune_data_inception_resnet = np.load('./Data/Inception-ResNet-4P-train-precompute/whole_train_data_precomp.npy')
fine_tune_data_inception_resnet.shape

(16000, 5, 1536)

In [21]:

pcas_inception_resnet = [PCA(N_COMPS_3, random_state=random_state) for _ in range(5)]

In [22]:
for i in range(5):
    pcas_inception_resnet[i].fit(fine_tune_data_inception_resnet[:,i,:])

In [23]:
fine_tune_data_train_red_inception_resnet = np.zeros((fine_tune_data_inception_resnet.shape[0], 5, N_COMPS_3))
for i in range(5):
    fine_tune_data_train_red_inception_resnet[:,i,:] = pcas_inception_resnet[i].transform(fine_tune_data_inception_resnet[:,i,:])
fine_tune_data_inception_resnet = fine_tune_data_train_red_inception_resnet
del fine_tune_data_train_red_inception_resnet
fine_tune_data_inception_resnet.shape

(16000, 5, 900)

In [24]:
fine_tune_data = np.concatenate([fine_tune_data_resnet, fine_tune_data_vgg, fine_tune_data_inception_resnet], axis=-1)
fine_tune_data.shape

(16000, 5, 2500)

In [25]:
fine_tune_labels = labels['label'].to_numpy()
fine_tune_labels.shape

(16000,)

In [26]:
fine_tune_data_train = fine_tune_data[list(train_labels['id'])]
fine_tune_data_train.shape

(15200, 5, 2500)

In [27]:
fine_tune_labels_train = train_labels['label'].to_numpy()
fine_tune_labels_train.shape

(15200,)

In [28]:
fine_tune_data_test = fine_tune_data[list(test_labels['id'])]
fine_tune_data_test.shape

(800, 5, 2500)

In [29]:
fine_tune_labels_test = test_labels['label'].to_numpy()
fine_tune_labels_test.shape

(800,)

Loading the CNN based trained model and generating data for XGBoost

In [30]:
def ftmodel():
    inputs = Input(shape=(5, N_COMPS_1 + N_COMPS_2 + N_COMPS_3))
    x = Flatten()(inputs)
    x = BatchNormalization()(x)

    top_dropout_rate = 0.5
    # x = Dropout(top_dropout_rate, name='top_dropout_1')(x)
    # x = Dense(1024, name='top_dense_1', kernel_initializer="he_normal", kernel_regularizer=l2(5e-5), activation='relu')(x)
    # x = BatchNormalization()(x)
    x = Dropout(top_dropout_rate, name='top_dropout_2')(x)
    x = Dense(512, name='top_dense_2', kernel_initializer="he_normal", kernel_regularizer=l2(5e-5), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(top_dropout_rate, name='top_dropout_3')(x)
    # x = Dense(256, name='top_dense_3', kernel_initializer="he_normal", kernel_regularizer=l2(5e-5), activation='relu')(x)
    # x = BatchNormalization()(x)
    # x = Dropout(top_dropout_rate, name='top_dropout_4')(x)
    outputs = Dense(16, activation='softmax', name='pred', kernel_regularizer=l2(5e-5))(x)

    model1 = tf.keras.Model(inputs, outputs, name='RestNet-VGG-Inception-ResNet-FineTune')
    return model1

In [31]:
model1 = ftmodel()
model1.summary()

Metal device set to: Apple M1 Max
Model: "RestNet-VGG-Inception-ResNet-FineTune"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 5, 2500)]         0         
                                                                 
 flatten (Flatten)           (None, 12500)             0         
                                                                 
 batch_normalization (BatchN  (None, 12500)            50000     
 ormalization)                                                   
                                                                 
 top_dropout_2 (Dropout)     (None, 12500)             0         
                                                                 
 top_dense_2 (Dense)         (None, 512)               6400512   
                                                                 
 batch_normalization_1 (Batc  (None, 512)              2048  

2022-10-09 16:00:05.238639: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-10-09 16:00:05.238754: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [60]:
model1.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = Adam(learning_rate = 0.0003))

In [68]:
model1.load_weights('./Models/ResNet-VGG-Inception-ResNet-4P/auto-ResNet-VGG-Inception-ResNet-FT-model-weight')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2956aab20>

In [65]:
model1.get_layer(index=len(model1.layers)-3).output

<KerasTensor: shape=(None, 512) dtype=float32 (created by layer 'batch_normalization_1')>

In [66]:
model2 = tf.keras.Model(model1.input, model1.get_layer(index=len(model1.layers)-3).output)

In [67]:
model2.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 5, 2500)]         0         
                                                                 
 flatten (Flatten)           (None, 12500)             0         
                                                                 
 batch_normalization (BatchN  (None, 12500)            50000     
 ormalization)                                                   
                                                                 
 top_dropout_2 (Dropout)     (None, 12500)             0         
                                                                 
 top_dense_2 (Dense)         (None, 512)               6400512   
                                                                 
 batch_normalization_1 (Batc  (None, 512)              2048      
 hNormalization)                                             

In [72]:
xgboost_data_train = model2.predict(fine_tune_data_train)
xgboost_data_train.shape

 82/475 [====>.........................] - ETA: 0s

2022-10-09 16:17:32.048148: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




(15200, 512)

In [73]:
xgboost_data_test = model2.predict(fine_tune_data_test)
xgboost_data_test.shape



(800, 512)

### Model Training

The fine-tuning will be done on the ResNet+VGG+Inception-ResNet's PCA reduced $1024D+256D+768D$ representation vectors.

In [33]:
# EPOCHS = 1000
# checkpoint_filepaths = ['./Models/ResNet-VGG-Inception-ResNet-4P/auto-ResNet-VGG-Inception-ResNet-FT-model-weight']
# model_checkpoint_callback = [tf.keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_filepath,
#     save_weights_only=True,
#     monitor = "loss",
#     mode='min',
#     save_best_only=True) for checkpoint_filepath in checkpoint_filepaths]

In [34]:
# history = model1.fit(fine_tune_data_train, fine_tune_labels_train, epochs=EPOCHS, batch_size=800, validation_data=(fine_tune_data_test, fine_tune_labels_test), validation_batch_size=800, callbacks=[model_checkpoint_callback])
# model1.save_weights("./Models/ResNet-VGG-Inception-ResNet-4P/resnet-vgg-inception-resnet-4p-model-weights")
# model1.load_weights('./Models/ResNet-VGG-Inception-ResNet-4P/resnet-vgg-inception-resnet-4p-model-weights')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2d90a4d90>

In [107]:
path = "./Models/ResNet-VGG-Inception-ResNet-4P-XGBoost/resnet_vgg_inception_xgboost_final.pickle.dat"
def xgb_model(X_train, y_train, X_test, y_test, path):
    dtrain = xgb.DMatrix(X_train,label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)
    
    results = {}
    
    params = {
        'max_depth':10,
        'eta':0.005,
        'objective':'multi:softprob',
        'num_class':16,
        'eval_metric':'merror'
    }
    
    watchlist = [(dtrain, 'train'),(dtest, 'eval')]
    n_round = 400
    
    model = xgb.train(params, dtrain, n_round, evals=watchlist, evals_result=results, early_stopping_rounds=20)
    
    pickle.dump(model, open(path, "wb"))
    
    return model

In [108]:
xgbmodel = xgb_model(xgboost_data_train, fine_tune_labels_train, xgboost_data_test, fine_tune_labels_test, path)

[0]	train-merror:0.02901	eval-merror:0.41875
[1]	train-merror:0.01954	eval-merror:0.39875
[2]	train-merror:0.01770	eval-merror:0.39125
[3]	train-merror:0.01592	eval-merror:0.39000
[4]	train-merror:0.01441	eval-merror:0.38750
[5]	train-merror:0.01362	eval-merror:0.38500
[6]	train-merror:0.01257	eval-merror:0.38750
[7]	train-merror:0.01178	eval-merror:0.38000
[8]	train-merror:0.01007	eval-merror:0.37500
[9]	train-merror:0.00875	eval-merror:0.36375
[10]	train-merror:0.00836	eval-merror:0.36250
[11]	train-merror:0.00816	eval-merror:0.35875
[12]	train-merror:0.00704	eval-merror:0.35500
[13]	train-merror:0.00625	eval-merror:0.34875
[14]	train-merror:0.00572	eval-merror:0.34750
[15]	train-merror:0.00539	eval-merror:0.34750
[16]	train-merror:0.00513	eval-merror:0.35000
[17]	train-merror:0.00507	eval-merror:0.35250
[18]	train-merror:0.00480	eval-merror:0.35250
[19]	train-merror:0.00447	eval-merror:0.35125
[20]	train-merror:0.00447	eval-merror:0.35000
[21]	train-merror:0.00447	eval-merror:0.3525

### Model Evaluation

#### On Train Data

Now, after the model is being trained, we will look at the performance of the model on the training set.

In [109]:
train_preds = xgbmodel.predict(xgb.DMatrix(xgboost_data_train, label=fine_tune_labels_train))

In [110]:
train_pred_labels = np.argmax(train_preds, axis=-1)

In [111]:
train_pred_labels

array([ 1, 13, 13, ...,  3,  9,  9])

In [112]:
np.sum(train_pred_labels == train_labels['label'])

15189

Looking at the misclassified images to get a better idea about what features the model is unable to capture.

In [113]:
mis_train_data = train_labels[train_pred_labels != train_labels['label']]
mis_train_data

Unnamed: 0,id,images,label
3251,3423,./Data/train/train/3423.tif,4
3597,3781,./Data/train/train/3781.tif,0
5495,5784,./Data/train/train/5784.tif,7
5647,5943,./Data/train/train/5943.tif,4
8464,8915,./Data/train/train/8915.tif,9
11937,12562,./Data/train/train/12562.tif,4
12255,12896,./Data/train/train/12896.tif,9
12561,13221,./Data/train/train/13221.tif,4
12793,13468,./Data/train/train/13468.tif,1
14026,14763,./Data/train/train/14763.tif,4


In [114]:
precision, recall, fscore, support = score(train_labels['label'], train_pred_labels)
pd.DataFrame({
    'precision': precision,
    'recall': recall,
    'fscore': fscore,
    'support': support
})

Unnamed: 0,precision,recall,fscore,support
0,1.0,0.998978,0.999488,978
1,1.0,0.998919,0.999459,925
2,1.0,1.0,1.0,925
3,1.0,1.0,1.0,947
4,0.99892,0.994624,0.996767,930
5,0.997925,1.0,0.998962,962
6,0.998999,1.0,0.999499,998
7,1.0,0.997809,0.998904,913
8,0.999006,1.0,0.999503,1005
9,1.0,0.997901,0.99895,953


#### On Train-Test Data

In [115]:
test_preds = xgbmodel.predict(xgb.DMatrix(xgboost_data_test, label=fine_tune_labels_test))

In [116]:
test_pred_labels = np.argmax(test_preds, axis=-1)

In [117]:
test_pred_labels

array([11,  8,  0,  7,  7,  6, 13,  8, 14, 15,  6, 15, 13, 14,  1, 10,  0,
        9, 11,  1,  7, 14,  3, 12,  6,  0,  3,  5,  3,  3, 15,  1,  6,  2,
        6,  8,  4, 15, 11,  9,  6, 12, 14,  2,  5, 12,  5,  1, 13,  6,  0,
       10,  8, 15, 11, 14,  9,  9,  9,  2, 15, 13, 10, 11,  6, 10,  8,  0,
        6,  3,  8,  4, 12, 12,  8,  6,  7,  0, 13, 11, 11,  0,  4, 12,  9,
       12,  1, 13, 13, 13, 10, 11,  4, 10,  7, 15, 11,  8, 13, 10,  3, 10,
       12,  9,  7, 14,  8,  2,  5, 15,  7, 10,  9,  4, 15, 12, 14, 13, 11,
        6, 13,  1,  0, 12,  1,  5, 11,  5, 15, 13,  3, 11,  6,  1,  6,  9,
       14, 10,  7, 14, 12,  8, 13, 12,  8,  6,  8, 13,  2, 13, 15, 14,  4,
       14,  4,  9,  4, 14,  7,  7,  8,  4,  5,  3, 13,  1,  2,  3, 12,  8,
       13,  6,  3, 15,  7, 12, 10, 14, 14,  8,  6, 10, 13, 11,  6, 13,  4,
       14, 11,  7, 15,  0,  3,  0, 10, 11, 13, 10, 11, 11, 11, 13,  9,  1,
        9, 11, 13, 14, 12,  1, 12, 13,  7,  6, 13, 10,  4, 12,  8,  0,  9,
        1, 15,  1,  7, 10

In [118]:
np.sum(test_pred_labels == test_labels['label'])

553

In [119]:
mis_train_data = test_labels[test_pred_labels != test_labels['label']]
mis_train_data

Unnamed: 0,id,images,label
2,33,./Data/train/train/33.tif,6
3,41,./Data/train/train/41.tif,1
4,65,./Data/train/train/65.tif,14
5,67,./Data/train/train/67.tif,9
7,100,./Data/train/train/100.tif,10
...,...,...,...
783,15589,./Data/train/train/15589.tif,5
784,15596,./Data/train/train/15596.tif,8
786,15660,./Data/train/train/15660.tif,4
788,15687,./Data/train/train/15687.tif,12


In [57]:
precision, recall, fscore, support = score(test_labels['label'], test_pred_labels)
pd.DataFrame({
    'precision': precision,
    'recall': recall,
    'fscore': fscore,
    'support': support
})

Unnamed: 0,precision,recall,fscore,support
0,0.77193,0.814815,0.792793,54
1,0.653061,0.727273,0.688172,44
2,0.886364,0.906977,0.896552,43
3,0.954545,0.913043,0.933333,46
4,0.885714,0.861111,0.873239,36
5,0.717391,0.66,0.6875,50
6,0.928571,0.847826,0.886364,46
7,0.854167,0.788462,0.82,52
8,0.844828,0.907407,0.875,54
9,0.8125,0.829787,0.821053,47


In [58]:
np.mean(fscore)

0.8141554511573338

#### On Validation Data

Generating Predictions for the Validation Set

In [59]:
validation_data = pd.DataFrame({'images':['./Data/validation/validation/'+name for name in os.listdir('./Data/validation/validation/')]})
validation_data['id'] = [name.split('.')[0] for name in os.listdir('./Data/validation/validation/')]
validation_data['label'] = -1 # Simply added to prevent re-writing code
validation_data.sort_values(by=['id'], inplace=True)
validation_data

Unnamed: 0,images,id,label
850,./Data/validation/validation/17801.tif,17801,-1
898,./Data/validation/validation/17802.tif,17802,-1
852,./Data/validation/validation/17803.tif,17803,-1
798,./Data/validation/validation/17804.tif,17804,-1
750,./Data/validation/validation/17805.tif,17805,-1
...,...,...,...
246,./Data/validation/validation/18696.tif,18696,-1
211,./Data/validation/validation/18697.tif,18697,-1
760,./Data/validation/validation/18698.tif,18698,-1
787,./Data/validation/validation/18699.tif,18699,-1


In [202]:
fine_tune_data_validation_resnet = np.load('./Data/ResNet-4P-validation-precompute/whole_validation_data_precomp.npy')
fine_tune_data_validation_resnet.shape

(900, 5, 2048)

In [203]:
fine_tune_data_validation_vgg = np.load('./Data/VGG16-4P-validation-precompute/whole_validation_data_precomp.npy')
fine_tune_data_validation_vgg.shape

(900, 5, 512)

In [204]:
fine_tune_data_validation_inception_resnet = np.load('./Data/Inception-ResNet-4P-validation-precompute/whole_validation_data_precomp.npy')
fine_tune_data_validation_inception_resnet.shape

(900, 5, 1536)

In [205]:
fine_tune_data_validation_red_resnet = np.zeros((fine_tune_data_validation_resnet.shape[0], 5, N_COMPS_1))
for i in range(5):
    fine_tune_data_validation_red_resnet[:,i,:] = pcas_resnet[i].transform(fine_tune_data_validation_resnet[:,i,:])
fine_tune_data_validation_resnet = fine_tune_data_validation_red_resnet
del fine_tune_data_validation_red_resnet
fine_tune_data_validation_resnet.shape

(900, 5, 1300)

In [206]:
fine_tune_data_validation_red_vgg = np.zeros((fine_tune_data_validation_vgg.shape[0], 5, N_COMPS_2))
for i in range(5):
    fine_tune_data_validation_red_vgg[:,i,:] = pcas_vgg[i].transform(fine_tune_data_validation_vgg[:,i,:])
fine_tune_data_validation_vgg = fine_tune_data_validation_red_vgg
del fine_tune_data_validation_red_vgg
fine_tune_data_validation_vgg.shape

(900, 5, 300)

In [207]:
fine_tune_data_validation_red_inception_resnet = np.zeros((fine_tune_data_validation_inception_resnet.shape[0], 5, N_COMPS_3))
for i in range(5):
    fine_tune_data_validation_red_inception_resnet[:,i,:] = pcas_inception_resnet[i].transform(fine_tune_data_validation_inception_resnet[:,i,:])
fine_tune_data_validation_inception_resnet = fine_tune_data_validation_red_inception_resnet
del fine_tune_data_validation_red_inception_resnet
fine_tune_data_validation_inception_resnet.shape

(900, 5, 900)

In [208]:
fine_tune_data_validation = np.concatenate([fine_tune_data_validation_resnet, fine_tune_data_validation_vgg, fine_tune_data_validation_inception_resnet], axis=-1)
fine_tune_data_validation.shape

(900, 5, 2500)

In [209]:
valid_preds = model1.predict(fine_tune_data_validation)



2022-10-09 14:57:12.599361: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [210]:
valid_pred_labels = np.argmax(valid_preds, axis=-1)

### Submission

For Kaggle Submission

In [211]:
submission_data = pd.DataFrame({'id':list(range(17801, 18701))})
submission_data['label'] = valid_pred_labels
submission_data

Unnamed: 0,id,label
0,17801,4
1,17802,6
2,17803,4
3,17804,8
4,17805,3
...,...,...
895,18696,7
896,18697,12
897,18698,8
898,18699,4


In [212]:
submission_data.to_csv('./Submissions/submission-resnet-vgg-inception-4p.csv', index=False)