# Assignment 4: Graphs & ResNets

### Assignment question is available here: https://ovgu-ailab.github.io/idl2023/assignment4.html

# ResNet on CIFAR-10

### Code-1

In [6]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Add

# Define the residual block
def residual_block(x, filters, kernel_size=3, stride=1):
    # Shortcut connection
    shortcut = x

    # First convolution layer
    x = Conv2D(filters, kernel_size=kernel_size, strides=stride, padding='same')(x)
    x = tf.keras.layers.ReLU()(x)

    # Second convolution layer
    x = Conv2D(filters, kernel_size=kernel_size, strides=1, padding='same')(x)

    # Add the shortcut to the output
    x = Add()([x, shortcut])

    return x

# Define the ResNet model
def build_resnet(input_shape, num_classes, num_blocks=3, num_filters=16):
    input_layer = Input(shape=input_shape)
    x = Conv2D(num_filters, kernel_size=3, strides=1, padding='same')(input_layer)

    # Create residual blocks
    for _ in range(num_blocks):
        x = residual_block(x, num_filters)

    # Add a Global Average Pooling layer
    x = GlobalAveragePooling2D()(x)

    # Output layer
    output_layer = Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

    return model

# Define CIFAR-10 data preprocessing
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

# Create and compile the model
input_shape = x_train.shape[1:]
model = build_resnet(input_shape, num_classes=10, num_blocks=3, num_filters=16)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
313/313 - 11s - loss: 1.1124 - accuracy: 0.6034 - 11s/epoch - 35ms/step
Test accuracy: 0.6033999919891357


## Experimenting with different (hyper)parameters

### Code - 2

In [7]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, Add, GlobalAveragePooling2D, Dense
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical

# Define the residual block
def residual_block(x, filters, kernel_size=3, strides=1):
    shortcut = x
    x = Conv2D(filters, kernel_size, strides=strides, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(filters, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)

    # Check if the input and output shapes match
    if shortcut.shape[-1] != x.shape[-1]:
        shortcut = Conv2D(filters, kernel_size=1, strides=strides, padding='same')(shortcut)

    x = Add()([x, shortcut])
    x = ReLU()(x)
    return x

# Build the simplified ResNet model
def build_resnet(input_shape, num_classes):
    input_layer = Input(shape=input_shape)

    # Initial convolution layer
    x = Conv2D(32, 3, padding='same')(input_layer)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    # Residual blocks
    num_blocks = 3  # experiment with this number
    filters = 32  # experiment with this number

    for _ in range(num_blocks):
        x = residual_block(x, filters)

    # Global average pooling and final classification layer
    x = GlobalAveragePooling2D()(x)
    output_layer = Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

    return model

# Load CIFAR-10 data and preprocess
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Define hyperparameters
batch_size = 128
learning_rate = 0.001
num_epochs = 10  # experiment with this number

# Build and compile the simplified ResNet model
input_shape = x_train[0].shape
num_classes = 10
model = build_resnet(input_shape, num_classes)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(x_train, y_train, epochs=num_epochs, batch_size=batch_size, validation_data=(x_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc}')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.6467999815940857


###  Comparing performance of Code-1 with and without tf.function

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Add

# Define the residual block
def residual_block(x, filters, kernel_size=3, stride=1):
    # Shortcut connection
    shortcut = x

    # First convolution layer
    x = Conv2D(filters, kernel_size=kernel_size, strides=stride, padding='same')(x)
    x = tf.keras.layers.ReLU()(x)

    # Second convolution layer
    x = Conv2D(filters, kernel_size=kernel_size, strides=1, padding='same')(x)

    # Add the shortcut to the output
    x = Add()([x, shortcut])

    return x

# Define the ResNet model
def build_resnet(input_shape, num_classes, num_blocks=3, num_filters=16):
    input_layer = Input(shape=input_shape)
    x = Conv2D(num_filters, kernel_size=3, strides=1, padding='same')(input_layer)

    # Create residual blocks
    for _ in range(num_blocks):
        x = residual_block(x, num_filters)

    # Add a Global Average Pooling layer
    x = GlobalAveragePooling2D()(x)

    # Output layer
    output_layer = Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

    return model

# Define CIFAR-10 data preprocessing
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

# Create and compile the model
input_shape = x_train.shape[1:]
model = build_resnet(input_shape, num_classes=10, num_blocks=3, num_filters=16)

# Without tf.function
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=64, epochs=2, validation_data=(x_test, y_test))
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"Test accuracy without tf.function: {test_acc}")

# With tf.function
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(labels, predictions))
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

optimizer = tf.keras.optimizers.Adam()
for epoch in range(2):
    for i in range(0, len(x_train), 64):
        x_batch = x_train[i:i+64]
        y_batch = y_train[i:i+64]
        loss = train_step(x_batch, y_batch)
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    print(f'Epoch {epoch+1}/10 - Test accuracy with tf.function: {test_acc}')


Epoch 1/2
Epoch 2/2
313/313 - 10s - loss: 1.5340 - accuracy: 0.4374 - 10s/epoch - 33ms/step
Test accuracy without tf.function: 0.4374000132083893
Epoch 1/10 - Test accuracy with tf.function: 0.4952999949455261
Epoch 2/10 - Test accuracy with tf.function: 0.5332000255584717


#### Test accuracy with tf.function is much higher than Test accuracy without tf.function