Reference: **[Residual Networks (ResNet) - Deep Learning](https://www.geeksforgeeks.org/deep-learning/residual-networks-resnet-deep-learning/)**

Modified by the help of ChatGPT

## 1. Importing keras APIs and modules.
These APIs help in building the architecture of the ResNet Module

In [None]:
import keras
from keras.layers import Dense, Conv2D, BatchNormalization, Activation
from keras.layers import AveragePooling2D, Input, Flatten
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras import backend as K
from keras.models import Model
import numpy as np
import os
import json
from pathlib import Path
import cv2
from sklearn.preprocessing import LabelEncoder

: 

## 2. Hyper parameters and pre-processing

In [None]:
batch_size = 32
epochs = 200
data_augmentation = True
subtract_pixel_mean = True
n = 3
version = 1

if version == 1:
    depth = n * 6 + 2
elif version == 2:
    depth = n * 9 + 2

model_type = 'ResNet % dv % d' % (depth, version)

# Load COCO annotations
dataset_path = Path('dashcam 2.v1i.coco/train')
annotations_file = dataset_path / '_annotations.coco.json'

with open(annotations_file, 'r') as f:
    coco_data = json.load(f)

# Parse COCO format
images_info = {img['id']: img for img in coco_data['images']}
categories = {cat['id']: cat['name'] for cat in coco_data['categories']}

# Load images and annotations
x_train = []
y_train = []

for annotation in coco_data['annotations']:
    img_id = annotation['image_id']
    img_info = images_info[img_id]
    img_path = dataset_path / img_info['file_name']
    
    # Read and preprocess image
    img = cv2.imread(str(img_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (32, 32))  # Resize to match CIFAR-10 size
    x_train.append(img)
    
    # Get category label
    category_id = annotation['category_id']
    category_name = categories[category_id]
    y_train.append(category_name)

x_train = np.array(x_train, dtype='float32') / 255
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train).reshape(-1, 1)

num_classes = len(label_encoder.classes_)

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print('y_train shape:', y_train.shape)
print('Classes:', label_encoder.classes_)

x_train shape: (5631, 32, 32, 3)
5631 train samples
y_train shape: (5631, 1)
Classes: ['Mobil' 'Motor' 'Orang' 'Truk']


## 3. Learning Rate Schedule
Used to adjust learning rate based on the amount of epochs.

In [4]:
def lr_schedule(epoch):
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

## 4. Basic ResNet Building Block

In [None]:
def resnet_layer(inputs,
                 num_filters=16,
                 kernel_size=3,
                 strides=1,
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True):
    conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))

    x = inputs
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)
    return x

## 5. Defining ResNet Architecture

In [6]:
def resnet_v1(input_shape, depth, num_classes=10): 
    if (depth - 2) % 6 != 0: 
        raise ValueError('depth should be 6n + 2 (eg 20, 32, 44 in [a])') 

    num_filters = 16
    num_res_blocks = int((depth - 2) / 6) 

    inputs = Input(shape=input_shape) 
    x = resnet_layer(inputs=inputs) 

    for stack in range(3): 
        for res_block in range(num_res_blocks): 
            strides = 1
            if stack > 0 and res_block == 0: 
                strides = 2 
            y = resnet_layer(inputs=x, 
                             num_filters=num_filters, 
                             strides=strides) 
            y = resnet_layer(inputs=y, 
                             num_filters=num_filters, 
                             activation=None) 
            if stack > 0 and res_block == 0: 
                x = resnet_layer(inputs=x, 
                                 num_filters=num_filters, 
                                 kernel_size=1, 
                                 strides=strides, 
                                 activation=None, 
                                 batch_normalization=False) 
            x = keras.layers.add([x, y]) 
            x = Activation('relu')(x) 
        num_filters *= 2

    x = AveragePooling2D(pool_size=8)(x) 
    y = Flatten()(x) 
    outputs = Dense(num_classes, 
                    activation='softmax', 
                    kernel_initializer='he_normal')(y) 

    model = Model(inputs=inputs, outputs=outputs) 
    return model

## 6. Train and test the model

In [None]:
model = resnet_v1(input_shape=x_train.shape[1:], depth=depth, num_classes=num_classes)

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(learning_rate=lr_schedule(0)),
              metrics=['accuracy'])
model.summary()
print(model_type)

save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'cifar10_%s_model.{epoch:03d}.keras' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

checkpoint = ModelCheckpoint(filepath=filepath,
                              monitor='val_acc',
                              verbose=1,
                              mode='max',
                              save_best_only=True)

lr_scheduler = LearningRateScheduler(lr_schedule)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=10,
                               min_lr=0.5e-6)

callbacks = [checkpoint, lr_reducer, lr_scheduler]

if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, keras.utils.to_categorical(y_train, num_classes),
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.2,
              shuffle=True,
              callbacks=callbacks)
else:
    print('Using real-time data augmentation.')
    # Complete the ImageDataGenerator
    datagen = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        zca_whitening=False,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    # Fit the generator on the training data
    datagen.fit(x_train)

    # Convert labels to categorical
    y_train_cat = keras.utils.to_categorical(y_train, num_classes)
    
    # Split data into train and validation sets manually
    split_idx = int(0.8 * len(x_train))
    x_train_split = x_train[:split_idx]
    y_train_split = y_train_cat[:split_idx]
    x_val = x_train[split_idx:]
    y_val = y_train_cat[split_idx:]

    # Use the generator for training
    model.fit(datagen.flow(x_train_split, y_train_split, batch_size=batch_size),
              steps_per_epoch=x_train_split.shape[0] // batch_size,
              epochs=epochs,
              validation_data=(x_val, y_val),
              callbacks=callbacks)

Learning rate:  0.001


ResNet  20v  1
Using real-time data augmentation.
Learning rate:  0.001
Epoch 1/200
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.3389 - loss: 1.5838

  if self._should_save_model(epoch, batch, logs, filepath):


[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 81ms/step - accuracy: 0.3853 - loss: 1.4713 - val_accuracy: 0.4046 - val_loss: 1.4192 - learning_rate: 0.0010
Learning rate:  0.001
Epoch 2/200
Learning rate:  0.001
Epoch 2/200
[1m  1/140[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9s[0m 71ms/step - accuracy: 0.4375 - loss: 1.4324



[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.4375 - loss: 1.4324 - val_accuracy: 0.3931 - val_loss: 1.4399 - learning_rate: 0.0010
Learning rate:  0.001
Epoch 3/200
Learning rate:  0.001
Epoch 3/200
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 74ms/step - accuracy: 0.4725 - loss: 1.3231 - val_accuracy: 0.4969 - val_loss: 1.3354 - learning_rate: 0.0010
Learning rate:  0.001
Epoch 4/200
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 74ms/step - accuracy: 0.4725 - loss: 1.3231 - val_accuracy: 0.4969 - val_loss: 1.3354 - learning_rate: 0.0010
Learning rate:  0.001
Epoch 4/200
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6250 - loss: 1.0910 - val_accuracy: 0.4880 - val_loss: 1.3305 - learning_rate: 0.0010
Learning rate:  0.001
Epoch 5/200
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6250 - loss: 1.0910 - val_accuracy: 0