In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
import tensorflow_datasets as tfds

# Weights and Biases related imports
import wandb
from wandb.keras import WandbMetricsLogger

In [2]:
import pickle
import numpy as np
from tensorflow.keras.utils import to_categorical

def load_cifar10_batch(file_path):
    with open(file_path, 'rb') as file:
        batch = pickle.load(file, encoding='bytes')
    return batch

def load_cifar10_data(folder_path):
    train_data = []
    train_labels = []

    for i in range(1, 6):
        batch_file = f"{folder_path}/data_batch_{i}"
        batch = load_cifar10_batch(batch_file)
        train_data.append(batch[b'data'])
        train_labels.extend(batch[b'labels'])

    test_batch_file = f"{folder_path}/test_batch"
    test_batch = load_cifar10_batch(test_batch_file)
    test_data = test_batch[b'data']
    test_labels = test_batch[b'labels']

    train_data = np.vstack(train_data)
    train_labels = np.array(train_labels)
    test_labels = np.array(test_labels)

    return train_data, train_labels, test_data, test_labels

def preprocess_data(train_data, train_labels, test_data, test_labels):
    train_data = train_data.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
    test_data = test_data.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)

    train_labels_onehot = to_categorical(train_labels)
    test_labels_onehot = to_categorical(test_labels)

    return train_data, train_labels_onehot, test_data, test_labels_onehot

cifar10_folder = 'cifar-10-batches-py'

train_data, train_labels, test_data, test_labels = load_cifar10_data(cifar10_folder)

x_train, y_train, x_test, y_test = preprocess_data(
    train_data, train_labels, test_data, test_labels
)

print("Train Data Shape:", x_train.shape)
print("Train Labels Shape:", y_train.shape)
print("Test Data Shape:", x_test.shape)
print("Test Labels Shape:", y_test.shape)

Train Data Shape: (50000, 32, 32, 3)
Train Labels Shape: (50000, 10)
Test Data Shape: (10000, 32, 32, 3)
Test Labels Shape: (10000, 10)


In [3]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msta314[0m ([33mtakim[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
sweep_config = {
    'method': 'grid'
    }

metric = {
    'name': 'val_loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'augmentation': {
          'values': ['none', 'light', 'heavy']
        }
    }

sweep_config['parameters'] = parameters_dict

parameters_dict.update({
    'earlystopping_patience': {
        'value': 10},
    'epochs': {
        'value': 100},
    'learning_rate': {
        'value': 0.001},
    'batch_size': {
        'value': 64},
    'kernel_size': {
          'value': '5x5'},
    'net_filter_size': {
          'value': 32},
    'net_n': {
          'value': 3},
    'reg_alpha': {
          'value': 0.0001},
    'normalization': {
          'value': False}
    })

In [5]:
import pprint

pprint.pprint(sweep_config)

{'method': 'grid',
 'metric': {'goal': 'minimize', 'name': 'val_loss'},
 'parameters': {'augmentation': {'values': ['none', 'light', 'heavy']},
                'batch_size': {'value': 64},
                'earlystopping_patience': {'value': 10},
                'epochs': {'value': 100},
                'kernel_size': {'value': '5x5'},
                'learning_rate': {'value': 0.001},
                'net_filter_size': {'value': 32},
                'net_n': {'value': 3},
                'normalization': {'value': False},
                'reg_alpha': {'value': 0.0001}}}


In [6]:
# sweep_id = wandb.sweep(sweep_config, project="zayif-test")
sweep_id = wandb.sweep(sweep_config, project="CIFAR-10_Classification")

Create sweep with ID: zt6k7al0
Sweep URL: https://wandb.ai/takim/CIFAR-10_Classification/sweeps/zt6k7al0


In [7]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU, Lambda, Add, Input, GlobalAveragePooling2D, Flatten, Dense, Softmax
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2

def ResidualBlock(x, filter_size, is_switch_block, kernel_size, reg_alpha):

    # note that if is_switch_block true, it means that output will not be the same as the input
    # so while merging the residual connection, we need to adapt to it
    # this adaptation could be with a conv layer, or a simple downsampling + padding is enough.

    x_skip = x # save original input to the block

    if not is_switch_block:
        x = Conv2D(filter_size, kernel_size=kernel_size, strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    else:
        x = Conv2D(filter_size, kernel_size=kernel_size, strides=(2, 2), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(filter_size, kernel_size=kernel_size, strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    x = BatchNormalization()(x)

    if is_switch_block: # takes every second element to half(v) spatial dimension and then adds padding to each side for matching filter (last) dimension
        x_skip = Lambda(lambda x: tf.pad(x[:, ::2, ::2, :], tf.constant([[0, 0,], [0, 0], [0, 0], [filter_size//4, filter_size//4]]), mode="CONSTANT"))(x_skip)

    x = Add()([x, x_skip])
    x = ReLU()(x)

    return x

def ResidualBlocks(x, filter_size, n, kernel_size, reg_alpha):
    for group in range(3): # a stack of 6n layers, 3×3 convolutions, feature maps of sizes {4fs, 2fs, fs}, 2n layers for each size
        for block in range(n):
            if group > 0 and block == 0: # double filter size
                filter_size *= 2
                is_switch_block = True
            else:
                is_switch_block = False
                
            x = ResidualBlock(x, filter_size, is_switch_block, kernel_size, reg_alpha)

    return x

def create_model(config):

    filter_size = config['net_filter_size']
    n = config['net_n']
    kernel_size = (3, 3) if config['kernel_size'] == '3x3' else (5, 5)

    reg_alpha = config['reg_alpha']

    inputs = Input(shape=(32, 32, 3))
    x = Conv2D(filter_size, kernel_size=kernel_size, strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = ResidualBlocks(x, filter_size, n, kernel_size, reg_alpha)
    x = GlobalAveragePooling2D()(x)
    x = Flatten()(x)
    x = Dense(10)(x)
    outputs = Softmax()(x)

    model = Model(inputs, outputs, name=f"ResNet-{n*6+2}")
    return model

In [8]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

def train(config = None):
    with wandb.init(config=config):

        config = wandb.config

        do_normalization = config['normalization']
        do_augmentation = config['augmentation'] != 'none'

        x_train_to_use = (x_train.astype('float32') / 255) if do_normalization else x_train
        x_test_to_use = (x_test.astype('float32') / 255) if do_normalization else x_test

        tf.keras.backend.clear_session()
        model = create_model(config)
        model.compile(
            optimizer = Adam(learning_rate=config["learning_rate"]),
            loss = "categorical_crossentropy",
            metrics = ["accuracy", tf.keras.metrics.TopKCategoricalAccuracy(k=3, name='top@3_accuracy')]
        )

        early_stopping = EarlyStopping(monitor='val_loss',
                                    patience=config["earlystopping_patience"],
                                    restore_best_weights=True)

        if not do_augmentation:
            history = model.fit(x_train_to_use, y_train,
                                epochs=config["epochs"],
                                batch_size=config["batch_size"],
                                validation_split=0.1,
                                callbacks=[
                                    WandbMetricsLogger(log_freq='epoch'),
                                    early_stopping
                                ], verbose=1
                                )
        else:
            if config['augmentation'] == 'light':
                datagen = ImageDataGenerator(
                    rotation_range=20,
                    horizontal_flip=True,
                    width_shift_range=0.1,
                    height_shift_range=0.1,
                    fill_mode='nearest'
                )
            else:
                datagen = ImageDataGenerator(
                    rotation_range=40,
                    horizontal_flip=True,
                    width_shift_range=0.2,
                    height_shift_range=0.2,
                    shear_range=0.1,
                    zoom_range=0.1,
                    fill_mode='nearest'
                )

            x_tr, x_vl, y_tr, y_vl = train_test_split(x_train_to_use, y_train, test_size=0.1, random_state=42)

            train_datagen = datagen.flow(x_tr, y_tr, batch_size=config["batch_size"])
            history = model.fit(train_datagen,
                                epochs=config["epochs"],
                                batch_size=config["batch_size"],
                                validation_data=(x_vl, y_vl),
                                callbacks=[
                                    WandbMetricsLogger(log_freq='epoch'),
                                    early_stopping
                                ], verbose=1
                                )
            
        
        test_stats = model.evaluate(x_test_to_use, y_test)
        wandb.log({"test_loss": test_stats[0]})
        wandb.log({"test_acc": test_stats[1]})

        val_loss_history = history.history['val_loss']
        val_acc_history = history.history['val_accuracy']

        best_epoch_num = -1 if (len(val_loss_history) == 100 or len(val_loss_history) <= 10) else (len(val_loss_history) - 11)

        wandb.log({"best_val_loss": val_loss_history[best_epoch_num]})
        wandb.log({"best_val_acc": val_acc_history[best_epoch_num]})

In [9]:
# wandb.agent(sweep_id, train, count=1)
wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: p6hhejtu with config:
[34m[1mwandb[0m: 	augmentation: none
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	earlystopping_patience: 10
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	kernel_size: 5x5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	net_filter_size: 32
[34m[1mwandb[0m: 	net_n: 3
[34m[1mwandb[0m: 	normalization: False
[34m[1mwandb[0m: 	reg_alpha: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100


VBox(children=(Label(value='0.001 MB of 0.030 MB uploaded\r'), FloatProgress(value=0.038395656248990016, max=1…

0,1
best_val_acc,▁
best_val_loss,▁
epoch/accuracy,▁▄▅▅▆▆▆▆▇▇▇▇▇▇▇█████████████
epoch/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/top@3_accuracy,▁▅▆▆▇▇▇▇▇▇██████████████████
epoch/val_accuracy,▂▁▄▃▆▇▆▄▇▆▅▆▇▅▆▇▆█▆▇▇█▇▆▇▇█▅
epoch/val_loss,▅█▅▅▂▂▃▇▂▂▄▃▃▇▃▃▄▁▄▃▄▂▄▅▄▄▂█
epoch/val_top@3_accuracy,▃▁▂▄▆▇▇▃▇▇▆▆▇▄▆█▆█▆▇▇█▇▆▇▇█▆

0,1
best_val_acc,0.8254
best_val_loss,0.83526
epoch/accuracy,0.95831
epoch/epoch,27.0
epoch/learning_rate,0.001
epoch/loss,0.41562
epoch/top@3_accuracy,0.99804
epoch/val_accuracy,0.6914
epoch/val_loss,1.70614
epoch/val_top@3_accuracy,0.9286


[34m[1mwandb[0m: Agent Starting Run: n11y8zac with config:
[34m[1mwandb[0m: 	augmentation: light
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	earlystopping_patience: 10
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	kernel_size: 5x5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	net_filter_size: 32
[34m[1mwandb[0m: 	net_n: 3
[34m[1mwandb[0m: 	normalization: False
[34m[1mwandb[0m: 	reg_alpha: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
best_val_acc,▁
best_val_loss,▁
epoch/accuracy,▁▃▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇█▇███████████████████
epoch/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/top@3_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇▇▇█████████████████████████
epoch/val_accuracy,▁▃▄▆▅▅▄▆▇▆▆▆▇▆▅▇▇▇▇█▆▇▇▇▇█▇████▇▇█▇▇▇▆█
epoch/val_loss,█▇▆▃▄▄▆▃▂▃▃▄▂▃▇▂▂▂▂▁▃▂▂▂▂▂▂▁▁▁▁▂▂▁▃▂▂▃▂
epoch/val_top@3_accuracy,▁▄▆▆▆▇▆▇▇▇▇▆▇▇▄█▇▇▇█▇█▇█▇█▇███████▇▇▇▇█

0,1
best_val_acc,0.852
best_val_loss,0.67473
epoch/accuracy,0.88267
epoch/epoch,38.0
epoch/learning_rate,0.001
epoch/loss,0.58068
epoch/top@3_accuracy,0.98198
epoch/val_accuracy,0.8256
epoch/val_loss,0.76311
epoch/val_top@3_accuracy,0.9648


[34m[1mwandb[0m: Agent Starting Run: d7n93sxp with config:
[34m[1mwandb[0m: 	augmentation: heavy
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	earlystopping_patience: 10
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	kernel_size: 5x5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	net_filter_size: 32
[34m[1mwandb[0m: 	net_n: 3
[34m[1mwandb[0m: 	normalization: False
[34m[1mwandb[0m: 	reg_alpha: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
best_val_acc,▁
best_val_loss,▁
epoch/accuracy,▁▃▄▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████████████████████
epoch/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▆▅▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/top@3_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇▇▇▇█████████████████████████
epoch/val_accuracy,▁▃▄▄▆▆▆▅▆▂▇▇▇▇▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇██▇▇█▇▇▇██
epoch/val_loss,▆▅▄▄▃▂▂▃▂█▂▂▂▂▃▂▁▂▂▁▂▂▁▁▁▁▂▂▁▁▁▁▂▂▁▁▂▂▁▁
epoch/val_top@3_accuracy,▁▄▄▄▅▇▇▆▇▅▇▇█▇▇▇██▇▇▇▇████▇████▇▇▇██▇▇██

0,1
best_val_acc,0.8436
best_val_loss,0.71046
epoch/accuracy,0.84707
epoch/epoch,49.0
epoch/learning_rate,0.001
epoch/loss,0.69981
epoch/top@3_accuracy,0.97271
epoch/val_accuracy,0.8364
epoch/val_loss,0.74274
epoch/val_top@3_accuracy,0.9678


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
