In [1]:
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.python.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from keras import backend as K
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import random
import time



In [2]:
def import_resnet50v2():
    # importing resnet50v2 from keras
    model = keras.applications.ResNet50V2(
        include_top=False,
        weights="imagenet",
        input_shape=(32,32,3),
        pooling='avg'
    )
    
    print("\nResnet50V2 has been imported")
    
    return model

In [3]:
# resnet50v2_pretrained.summary()

In [4]:
# 15TH BATCH TRAINING AND ONWARDS, LAYERS FROZE TILL: conv5_block2_preact_bn
# 12TH till 14th BATCH TRAINING, LAYERS FROZE TILL: conv5_block3_preact_bn
# 1ST TILL 11TH BATCH TRAINING, ALL LAYERS FROZE

# freezing resnet50v2 layers after conv5_block2

def freeze_layers(model, layer='none'):
    
    if layer == 'none':

        # freezing models all layers
        for layer in model.layers:
            layer.trainable = False
            
        # Assuming your model is named 'model'
        trainable_count = np.sum([np.prod(w.shape) for w in model.trainable_weights])
            
        print("All layers froze")
        print(f"Trainable parameters: {trainable_count}\n")
    
    else:

        # Find the index of the layer you want to stop freezing at
        stop_layer_name = layer
        stop_layer_index = None
        for i, layer in enumerate(model.layers):
            if stop_layer_name in layer.name:
                stop_layer_index = i
                break

        # Freeze layers up to the stop layer
        for layer in model.layers[:stop_layer_index + 1]:
            layer.trainable = False

        # Unfreeze layers after the stop layer
        for layer in model.layers[stop_layer_index + 1:]:
            layer.trainable = True

        # Assuming your model is named 'model'
        trainable_count = np.sum([np.prod(w.shape) for w in model.trainable_weights])

        print(f"All layers froze prior to '{stop_layer_name}'")
        print(f"Trainable parameters: {trainable_count}\n")
    
    return model

In [5]:
# transofrming data for training

def transform_data_for_training(file_path):
    
    # importing training data
    train_data = pd.read_csv(file_path)

    # specifying size of the image
    img_size_org = (28,28,1)

    # separating labels
    train_targets = train_data['label']
    train_features = train_data.drop('label', axis=1)

    # one-hot-encoding targets
    train_targets = pd.get_dummies(train_targets, columns=['label'])

    # normalizing features
    train_features = train_features / 255

    # converting pandas dataframe to numpy array
    train_targets = train_targets.values
    train_features = train_features.values

    # resizing orignal images dimension and number of channels to the required dimensions and number of channels
    train_features = train_features.reshape((42000,28,28))
    train_features = np.expand_dims(train_features, axis=-1)
    train_features = tf.image.grayscale_to_rgb(tf.convert_to_tensor(train_features))
    train_features = tf.image.resize(train_features, (32,32))
    train_features = train_features.numpy()
    
    return train_features, train_targets

In [6]:
# splitting training data

def train_val_split(train_features, train_targets, training_ratio, random_seed):
    
    # setting a fixed seed for reproducibility
    np.random.seed(random_seed)
    
    # splitting data between train, dev, and test sets
    training_ratio = 0.9
    X_train, X_val, y_train, y_val = train_test_split(train_features, train_targets, test_size=(1-training_ratio))
    
    return X_train, X_val, y_train, y_val

In [7]:
# creating resnet50v2 with added layers

def resnet50v2_custom_create(resnet50v2_pretrained, model_name, random_seed, learningrate, momentum, dropoutrate, L2_regularizer):
        
    # building the model
    model = Sequential(name=model_name)
    
    # adding final layers to train
    model.add(resnet50v2_pretrained)
    model.add(Flatten())
    model.add(Dense(1024, activation='relu', kernel_initializer=tf.keras.initializers.GlorotNormal(seed=random_seed), use_bias=True, bias_initializer='zeros'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropoutrate))
    model.add(Dense(512, activation='relu', kernel_initializer=tf.keras.initializers.GlorotNormal(seed=random_seed), use_bias=True, bias_initializer='zeros'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropoutrate))
    model.add(Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.GlorotNormal(seed=random_seed), use_bias=True, bias_initializer='zeros'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropoutrate))
    model.add(Dense(128, activation='relu', kernel_initializer=tf.keras.initializers.GlorotNormal(seed=random_seed), use_bias=True, bias_initializer='zeros'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropoutrate))
    model.add(Dense(64, activation='relu', kernel_initializer=tf.keras.initializers.GlorotNormal(seed=random_seed), use_bias=True, bias_initializer='zeros'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropoutrate))
    model.add(Dense(32, activation='relu', kernel_initializer=tf.keras.initializers.GlorotNormal(seed=random_seed), use_bias=True, bias_initializer='zeros'))
    model.add(Dense(10, activation='softmax'))

    # printing models summary
    model.summary()

    # setting up learning rate decay
    # lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(learningrate, decay_steps=X_train.shape[0]/batchsize, decay_rate=lrdecay, staircase=False)

    # compiling model and setting hyperparameters
    model.compile(optimizer=Adam(learning_rate=learningrate, beta_1=momentum), loss='categorical_crossentropy', metrics=['accuracy'])
    
    trainable_count = np.sum([K.count_params(w) for w in model.trainable_weights])
    non_trainable_count = np.sum([K.count_params(w) for w in model.non_trainable_weights])
    total_params = trainable_count + non_trainable_count
    
    return model, total_params, trainable_count, non_trainable_count

In [8]:
# training resnet50v2 with added layers

def resnet50v2_custom_train(model, X_train, y_train, X_val, y_val, batchsize, epoch):
        
    checkpoint_path = f"/kaggle/working/{model.name}.h5"
    checkpoint_dir = os.path.dirname(checkpoint_path)

    # Create a callback that saves the model's weights
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)

    # Record the start time
    start_time = time.time()

    # training model
    history = model.fit(X_train, y_train, epochs=epoch, batch_size=batchsize, validation_data=(X_val, y_val), callbacks=[cp_callback])

    # Record the end time
    end_time = time.time()

    # Calculate the elapsed time
    training_time = end_time - start_time

    # Format the time as hours, minutes, and seconds
    training_time_formatted = time.strftime("%H:%M:%S", time.gmtime(training_time))
        
    return history, training_time_formatted

In [9]:
def plot_graph(history, model_name, training_time_formatted):
    
    # Plotting/Saving Graphs
    # Extract the training history
    train_loss = history.history['loss']
    train_accuracy = history.history['accuracy']
    val_loss = history.history['val_loss']
    val_accuracy = history.history['val_accuracy']

    # Plot training and validation loss
    plt.figure(figsize=(16, 12))
    plt.subplot(1, 2, 1)
    plt.plot(range(1, len(train_loss) + 1), train_loss, label='Training Loss')
    plt.plot(range(1, len(val_loss) + 1), val_loss, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Plot training and validation accuracy
    plt.subplot(1, 2, 2)
    plt.plot(range(1, len(train_accuracy) + 1), train_accuracy, label='Training Accuracy')
    plt.plot(range(1, len(val_accuracy) + 1), val_accuracy, label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    legend_text = f'Model Name: {model_name}\nLearning Rate: {learningrate:.5f}\nMomentum: {momentum:.3f}\nDropout Rate: {dropoutrate:.3f}\nBatch Size: {batchsize}\nEpochs: {epoch}\nTraining Time: {training_time_formatted}'
    plt.figtext(0.01, 0.01, legend_text, fontsize=10, va="bottom", ha="left")

    plt.tight_layout()

    # Save the figures to the specified directory
    figure_name = f"{model_name}_lowest_val_loss_{min(history.history['val_loss']):.4f}_15th_batch_training.png"  # Replace with your desired file name
    save_dir = "/kaggle/working"
    figure_path = os.path.join(save_dir, figure_name)
    plt.savefig(figure_path)
    plt.close()  # Close the figure to release resources

In [10]:
# LAST RAN: 14th Batch

# specifying a fixed seed for reproducibility
random_seed = 13

# file path of training data
file_path = "/kaggle/input/digit-recognizer/train.csv"

# training ratio to split data on
training_ratio = 0.9

# hyperparameter values
learningrates = [0.001, 0.001, 0.001, 0.002, 0.002, 0.002, 0.003, 0.003, 0.003]
momentums = [0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95]
dropoutrates = [0.5, 0.6, 0.7, 0.5, 0.6, 0.7, 0.5, 0.6, 0.7]
L2_regularizers = [0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001]
batchsizes = [256, 256, 256, 256, 256, 256, 256, 256, 256]
epoch = 1

# transforming raw data for training
train_features, train_targets = transform_data_for_training(file_path)
        
# splitting data into trainning and validation sets
X_train, X_val, y_train, y_val = train_val_split(train_features, train_targets, training_ratio, random_seed)

# printing hyperparameter values and data distribution
print("Random Seed: ", random_seed)
print("\nHyperparameter Values:")
print("Learning Rates: ", learningrates)
print("Momentums: ", momentums)
print("Dropout Rates: ", dropoutrates)
print("Batch Sizes: ", batchsizes)
print("Epochs: ", epoch)
print("\nData Distribution:")
print(f'training features = {X_train.shape}')
print(f'training targets = {y_train.shape}')
print(f'validation features = {X_val.shape}')
print(f'validation targets = {y_val.shape}')
# print(f'test features = {X_test.shape}')
# print(f'test targets = {y_test.shape}')
print(f'X_train = {(X_train.shape[0]/train_features.shape[0])*100:.2f}%')
print(f'X_val = {(X_val.shape[0]/train_features.shape[0])*100:.2f}%')
# print(f'X_test = {(X_test.shape[0]/train_features.shape[0])*100:.2f}%')
print(f'y_train = {(y_train.shape[0]/train_targets.shape[0])*100:.2f}%')
print(f'y_val = {(y_val.shape[0]/train_targets.shape[0])*100:.2f}%')
# print(f'y_test = {(y_test.shape[0]/train_targets.shape[0])*100:.2f}%')

Random Seed:  13

Hyperparameter Values:
Learning Rates:  [0.001, 0.001, 0.001, 0.002, 0.002, 0.002, 0.003, 0.003, 0.003]
Momentums:  [0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95]
Dropout Rates:  [0.5, 0.6, 0.7, 0.5, 0.6, 0.7, 0.5, 0.6, 0.7]
Batch Sizes:  [256, 256, 256, 256, 256, 256, 256, 256, 256]
Epochs:  1

Data Distribution:
training features = (37800, 32, 32, 3)
training targets = (37800, 10)
validation features = (4200, 32, 32, 3)
validation targets = (4200, 10)
X_train = 90.00%
X_val = 10.00%
y_train = 90.00%
y_val = 10.00%


In [11]:
# run this code cell to train and save models stats

modelnames = []
totalparameters = []
trainableparameters = []
nontrainableparameters = []

for i in range (0, len(learningrates)):
    
    modelname = f"resnet50v2_{i+1}"
    modelnames.append(modelname)
    
    # hyperparameter values
    learningrate = learningrates[i]
    momentum = momentums[i]
    dropoutrate = dropoutrates[i]
    L2_regularizer = L2_regularizers[i]
    batchsize = batchsizes[i]
    
    # importing renset50v2 pretrained 
    resnet50v2_pretrained = import_resnet50v2()
        
    # freezing layers of resnet50v2
    first_trainable_layer = 'conv5_block2_preact_bn'
    resnet50v2_pretrained = freeze_layers(resnet50v2_pretrained, 'conv5_block2_preact_bn')
    
    # create resnet50v2 with added layers
    model, totalparams, trainableparams, nontrainableparams = resnet50v2_custom_create(resnet50v2_pretrained, modelname, random_seed, learningrate, momentum, dropoutrate, L2_regularizer)        
    
    # appending parameters for record keeping
    totalparameters.append(totalparams)
    trainableparameters.append(trainableparams)
    nontrainableparameters.append(nontrainableparams)
    
    # train the model
    history, training_time_formatted = resnet50v2_custom_train(model, X_train, y_train, X_val, y_val, batchsize, epoch)

    # plotting graph
    plot_graph(history, model.name, training_time_formatted)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5

Resnet50V2 has been imported
All layers froze prior to 'conv5_block2_preact_bn'
Trainable parameters: 8929280

Model: "resnet50v2_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50v2 (Functional)     (None, 2048)              23564800  
                                                                 
 module_wrapper (ModuleWrap  (None, 2048)              0         
 per)                                                            
                                                                 
 module_wrapper_1 (ModuleWr  (None, 1024)              2098176   
 apper)                                                          
                                                                 
 batch_normalization (Batch  (None, 1024)              4096  

In [12]:
for i in range (0, len(learningrates)):    
    
    # loads the weights
    model.load_weights(f"/kaggle/working/resnet50v2_{i+1}.h5")
    
    result = model.evaluate(X_val, y_val)
    print(f"Evaluation of model_{i+1} on validation data")
    print(f"Test loss, Test acc: {result[0]:.4f} {result[1]*100:.2f}%\n")

Evaluation of model_1 on validation data
Test loss, Test acc: 2.0379 25.45%

Evaluation of model_2 on validation data
Test loss, Test acc: 2.1741 12.69%

Evaluation of model_3 on validation data
Test loss, Test acc: 2.0880 17.76%

Evaluation of model_4 on validation data
Test loss, Test acc: 2.0355 30.76%

Evaluation of model_5 on validation data
Test loss, Test acc: 2.0643 22.86%

Evaluation of model_6 on validation data
Test loss, Test acc: 2.1294 19.83%

Evaluation of model_7 on validation data
Test loss, Test acc: 2.7224 29.67%

Evaluation of model_8 on validation data
Test loss, Test acc: 2.1946 20.21%

Evaluation of model_9 on validation data
Test loss, Test acc: 2.0733 20.48%



In [13]:
# Check lengths of lists
print("Lengths:")
print("Model Names:", len(modelnames))
print("Learning Rates:", len(learningrates))
print("Momentums:", len(momentums))
print("Dropout Rates:", len(dropoutrates))
print("Batch Sizes:", len(batchsizes))
print("Epochs:", len([epoch] * len(modelnames)))
print("Total params:", len([totalparameters] * len(modelnames)))
print("Trainable params:", len([trainableparameters] * len(modelnames)))
print("Non-trainable params:", len([nontrainableparameters] * len(modelnames)))

data_distribution = {
    'training features': [X_train.shape] * len(modelnames),
    'training targets': [y_train.shape] * len(modelnames),
    'validation features': [X_val.shape] * len(modelnames),
    'validation targets': [y_val.shape] * len(modelnames),
    'X_train': [f'{(X_train.shape[0]/train_features.shape[0])*100:.2f}%'] * len(modelnames),
    'X_val': [f'{(X_val.shape[0]/train_features.shape[0])*100:.2f}%'] * len(modelnames),
    'y_train': [f'{(y_train.shape[0]/train_targets.shape[0])*100:.2f}%'] * len(modelnames),
    'y_val': [f'{(y_val.shape[0]/train_targets.shape[0])*100:.2f}%'] * len(modelnames)
}
model_params = {
    'Total params': totalparameters,
    'Trainable params': trainableparameters,
    'Non-trainable params': nontrainableparameters
}

# Create a DataFrame
df = pd.DataFrame({
    'Model Names': modelnames,
    'Random Seed': [random_seed] * len(modelnames),
    'Learning Rates': learningrates,
    'Momentums': momentums,
    'Dropout Rates': dropoutrates,
    'Batch Sizes': batchsizes,
    'Epochs': [epoch] * len(modelnames),
    'First Training Layer': [first_trainable_layer] * len(modelnames),
    **model_params,
    **data_distribution
})

# Save to Excel file
excel_filename = f'/kaggle/working/training_record.xlsx'
df.to_excel(excel_filename, index=False)

Lengths:
Model Names: 9
Learning Rates: 9
Momentums: 9
Dropout Rates: 9
Batch Sizes: 9
Epochs: 9
Total params: 9
Trainable params: 9
Non-trainable params: 9
