# Loading packages

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import random
import gc

from os import path
from matplotlib import pyplot as plt
from tensorflow.keras import layers, losses, optimizers, metrics, callbacks, Model, Input, regularizers

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=9048)]
    )

logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")

1 Physical GPU, 1 Logical GPUs


In [3]:
SEED = 123
N_CLASS = 10
IMG_SIZE = 32

In [4]:
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Loading datasets

In [5]:
dataset = np.load("resnet_output.npy")
dataset.shape

(50000, 2048)

In [6]:
filenames = list(os.walk(path.join(os.getcwd(), '..', 'cifar-10', 'train', 'train_images'), topdown=False))[0][2]
filenames = pd.DataFrame(filenames, columns=["name"]).reset_index(names="real_index")
filenames

Unnamed: 0,real_index,name
0,0,1.png
1,1,10.png
2,2,100.png
3,3,1000.png
4,4,10000.png
...,...,...
49995,49995,9995.png
49996,49996,9996.png
49997,49997,9997.png
49998,49998,9998.png


In [7]:
train_labels_df = pd.read_csv(path.join(os.getcwd(), '..', 'train_val_test', 'train.csv')).drop(["Unnamed: 0"], axis=1)
train_labels_df = train_labels_df.merge(filenames, on=["name"])
train_labels_df

Unnamed: 0,name,codes,real_index
0,1.png,6,0
1,100.png,1,2
2,10001.png,1,5
3,10002.png,6,6
4,10003.png,6,7
...,...,...,...
34995,9994.png,1,49994
34996,9996.png,3,49996
34997,9997.png,9,49997
34998,9998.png,1,49998


In [8]:
val_labels_df = pd.read_csv(path.join(os.getcwd(), '..', 'train_val_test', 'val.csv')).drop(["Unnamed: 0"], axis=1)
val_labels_df = val_labels_df.merge(filenames, on=["name"])
val_labels_df

Unnamed: 0,name,codes,real_index
0,10.png,3,1
1,1000.png,5,3
2,10000.png,5,4
3,10014.png,6,19
4,10029.png,7,35
...,...,...,...
4995,9961.png,8,49958
4996,9977.png,3,49975
4997,998.png,1,49978
4998,9986.png,0,49985


In [9]:
test_labels_df = pd.read_csv(path.join(os.getcwd(), '..', 'train_val_test', 'test.csv')).drop(["Unnamed: 0"], axis=1)
test_labels_df = test_labels_df.merge(filenames, on=["name"])
test_labels_df

Unnamed: 0,name,codes,real_index
0,10008.png,6,12
1,1001.png,9,14
2,10012.png,3,17
3,10015.png,5,20
4,10021.png,0,27
...,...,...,...
9995,9971.png,3,49969
9996,9981.png,6,49980
9997,9983.png,3,49982
9998,9984.png,0,49983


In [10]:
assert not train_labels_df.real_index.isin(test_labels_df).any()
assert not train_labels_df.real_index.isin(val_labels_df).any()
assert not test_labels_df.real_index.isin(val_labels_df).any()

In [11]:
train_x, train_y = dataset[train_labels_df.real_index, :], tf.one_hot(train_labels_df.codes, N_CLASS)
val_x, val_y = dataset[val_labels_df.real_index, :], tf.one_hot(val_labels_df.codes, N_CLASS)
test_x, test_y = dataset[test_labels_df.real_index, :], tf.one_hot(test_labels_df.codes, N_CLASS)
train_x.shape, val_x.shape, test_x.shape

((35000, 2048), (5000, 2048), (10000, 2048))

# Resnet FC

# Regularization

Training will be repeated 10 times with different weights initialization.

In [12]:
def create_model(regularizer=None):
    input_layer = Input(shape=(2048))
    x = layers.Dense(2048, activation="relu", kernel_regularizer=regularizer)(input_layer)
    x = layers.BatchNormalization()(x)
    output = layers.Dense(N_CLASS, activation="softmax")(x)
    model = Model(inputs=input_layer, outputs=output)
    return model

model = create_model(regularizer=regularizers.L1())
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 2048)]            0         
                                                                 
 dense (Dense)               (None, 2048)              4196352   
                                                                 
 batch_normalization (BatchN  (None, 2048)             8192      
 ormalization)                                                   
                                                                 
 dense_1 (Dense)             (None, 10)                20490     
                                                                 
Total params: 4,225,034
Trainable params: 4,220,938
Non-trainable params: 4,096
_________________________________________________________________


In [13]:
regularizers_ = [
    regularizers.L1(),
    regularizers.L2(),
    regularizers.L1L2()
]

early_stopping = callbacks.EarlyStopping(
    monitor='val_categorical_accuracy',
    min_delta=0,
    patience=10,
    verbose=0,
    mode='max',
    baseline=None,
    restore_best_weights=True
)

reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=5, min_lr=0.00001, verbose=1)

In [14]:
TRAINING_SEEDS = list(range(10))
results = []
for regularizer in regularizers_:
    for seed in TRAINING_SEEDS:
        random.seed(seed)
        np.random.seed(SEED)
        tf.random.set_seed(seed)

        model = create_model(regularizer=regularizer)

        model.compile(
            optimizer=optimizers.Adam(learning_rate=0.001),
            loss=losses.CategoricalCrossentropy(),
            metrics=[metrics.CategoricalAccuracy(), metrics.CategoricalCrossentropy()]
        )

        history = model.fit(
            x=train_x, y=train_y,
            epochs=200,
            batch_size=512,
            validation_data=(val_x, val_y),
            shuffle=True,
            callbacks=[early_stopping, reduce_lr],
            verbose=0
        )
        eval_results = model.evaluate(x=test_x, batch_size=512, y=test_y)

        results += [{
            'seed': seed,
            'regularizer': regularizer.__class__.__name__,
            'results': dict(zip(model.metrics_names, eval_results))
        }]
        gc.collect()

results = pd.DataFrame(results)
results = pd.concat([results.drop(["results"], axis=1), results["results"].apply(pd.Series)], axis=1)
results.to_csv('l1_l2_results.csv')


Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 25: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 34: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 55: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.

Epoch 64: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

Epoch 74: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.

Epoch 80: ReduceLROnPlateau reducing learning rate to 1e-05.

Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 25: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 31: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 41: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.

Epoch 49: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

Epoch 61: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.

E

In [15]:
results

Unnamed: 0,seed,regularizer,loss,categorical_accuracy,categorical_crossentropy
0,0,L1,0.408998,0.9113,0.265904
1,1,L1,0.456403,0.9081,0.276166
2,2,L1,0.432935,0.9103,0.261698
3,3,L1,0.397876,0.9112,0.26162
4,4,L1,0.44048,0.9114,0.264133
5,5,L1,0.431367,0.9141,0.262097
6,6,L1,0.463652,0.9075,0.277602
7,7,L1,0.400478,0.9127,0.263367
8,8,L1,0.445249,0.9082,0.269513
9,9,L1,3.191068,0.8748,0.363493


# Dropout

In [16]:
def create_model(dropout_val=None):
    input_layer = Input(shape=(2048))
    x = layers.Dense(2048, activation="relu", kernel_regularizer=regularizer)(input_layer)
    x = layers.Dropout(dropout_val)(x)
    x = layers.BatchNormalization()(x)
    output = layers.Dense(N_CLASS, activation="softmax")(x)
    model = Model(inputs=input_layer, outputs=output)
    return model

model = create_model(0.1)
model.summary()

Model: "model_31"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_32 (InputLayer)       [(None, 2048)]            0         
                                                                 
 dense_62 (Dense)            (None, 2048)              4196352   
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                                 
 batch_normalization_31 (Bat  (None, 2048)             8192      
 chNormalization)                                                
                                                                 
 dense_63 (Dense)            (None, 10)                20490     
                                                                 
Total params: 4,225,034
Trainable params: 4,220,938
Non-trainable params: 4,096
____________________________________________

In [17]:
dropout_values = [0.1, 0.2, 0.3, 0.5]

In [18]:
TRAINING_SEEDS = list(range(10))
results = []

for rate in dropout_values:
    for seed in TRAINING_SEEDS:
        random.seed(seed)
        np.random.seed(SEED)
        tf.random.set_seed(seed)

        model = create_model(rate)

        model.compile(
            optimizer=optimizers.Adam(learning_rate=0.001),
            loss=losses.CategoricalCrossentropy(),
            metrics=[metrics.CategoricalAccuracy(), metrics.CategoricalCrossentropy()]
        )

        history = model.fit(
            x=train_x, y=train_y,
            epochs=200,
            batch_size=512,
            validation_data=(val_x, val_y),
            shuffle=True,
            callbacks=[early_stopping, reduce_lr],
            verbose=0
        )
        
        eval_results = model.evaluate(x=test_x, batch_size=512, y=test_y)

        results += [{
            'seed': seed,
            'rate': rate,
            'results': dict(zip(model.metrics_names, eval_results))
        }]
        
        gc.collect()

results = pd.DataFrame(results)
results = pd.concat([results.drop(["results"], axis=1), results["results"].apply(pd.Series)], axis=1)
results.to_csv('dropout_results.csv')


Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 20: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 28: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 33: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.

Epoch 19: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 35: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 40: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 15: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 28: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 33: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 19: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 31: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 39: ReduceLROnPlateau reducing learning rate to 0.00012500

In [19]:
results

Unnamed: 0,seed,rate,loss,categorical_accuracy,categorical_crossentropy
0,0,0.1,0.32092,0.9262,0.32092
1,1,0.1,0.339523,0.9253,0.339523
2,2,0.1,0.323525,0.9264,0.323525
3,3,0.1,0.32426,0.9265,0.32426
4,4,0.1,0.332857,0.9257,0.332857
5,5,0.1,0.299719,0.9263,0.299719
6,6,0.1,0.332097,0.927,0.332097
7,7,0.1,0.307026,0.9278,0.307026
8,8,0.1,0.30705,0.9257,0.30705
9,9,0.1,0.323201,0.9238,0.323201
