# Author:
- Name: Quan Hoang Ngoc
- ID: 22521178
- Time: 28/10/2024
- Lab2

In [1]:
import os
import sys
import torch

print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.device_count())

2.5.0+cu121
True
1


# Content

## Clone corpus

In [2]:
import torchvision as tv
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [3]:
torch_ds = datasets.Caltech101(root='./data', download=True)
torch_ds, torch_ds[0], torch_ds[10]

Files already downloaded and verified


(Dataset Caltech101
     Number of datapoints: 8677
     Root location: ./data/caltech101
     Target type: ['category'],
 (<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=510x337>, 0),
 (<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=479x316>, 0))

## Valid

In [4]:
from imutils import paths # NEW library
import cv2
import numpy as np
from tqdm import tqdm

In [5]:
image_paths = list(paths.list_images('./data/caltech101'))
data = []
labels = []
size_ts_in = 224

for img_path in tqdm(image_paths):
    label = img_path.split(os.path.sep)[-2]
    if label == "BACKGROUND_Google":
        continue
    img = cv2.imread(img_path)
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Color to gray
    # hist = cv2.calcHist([img], [0], None, [256], [0, 256])
    # size = img.shape[0]*img.shape[1]
    # hist = hist/size
    # hist = hist.flatten()
    img = cv2.resize(img, dsize=(size_ts_in, size_ts_in))
    data.append(img)
    labels.append(label)
data = np.array(data)
labels = np.array(labels).reshape(-1, 1)
data.shape, labels.shape

100%|██████████| 9144/9144 [00:12<00:00, 759.01it/s]


((8677, 224, 224, 3), (8677, 1))

In [6]:
classes = sorted(np.unique(labels))
label_mapping = {label: idx for idx, label in enumerate(classes)}
encoded = np.array([label_mapping[label[0]] for label in labels]).reshape(-1, 1)
encoded, len(classes)

(array([[26],
        [26],
        [26],
        ...,
        [ 1],
        [ 1],
        [ 1]]),
 101)

In [7]:
from sklearn.preprocessing import OneHotEncoder

# Create a OneHotEncoder instance
encoder = OneHotEncoder(sparse_output=False, categories='auto')  # sparse=False for a dense output

# Fit the encoder to your encoded labels and transform them
encoded_onehot = encoder.fit_transform(encoded)

# Print the shape of the one-hot encoded labels
print("One-hot encoded shape:", encoded_onehot.shape)

One-hot encoded shape: (8677, 101)


In [8]:
from sklearn.model_selection import train_test_split

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data, encoded_onehot, test_size=0.2, random_state=42)

# Further split train into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

# Print the shapes of the resulting sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (5205, 224, 224, 3)
y_train shape: (5205, 101)
X_val shape: (1736, 224, 224, 3)
y_val shape: (1736, 101)
X_test shape: (1736, 224, 224, 3)
y_test shape: (1736, 101)


## Task1

In [9]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten, Input
from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU, DepthwiseConv2D, AvgPool2D
from tensorflow.keras.models import Sequential, Model

In [10]:
from tensorflow.keras.layers import Input, Conv2D, DepthwiseConv2D, BatchNormalization, ReLU, Dropout, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model

def mobilnet_v1_block(inputs, filters, strides):
    """
    Defines a single MobileNetV1 block.

    Args:
        inputs: Input tensor.
        filters: Number of filters in the pointwise convolution.
        strides: Stride for the depthwise convolution.

    Returns:
        Output tensor of the block.
    """
    # [CPN1] Conv reduce wide
    x = DepthwiseConv2D(kernel_size=3, strides=strides, padding='same', use_bias=False)(inputs)
    x = BatchNormalization()(x)
    x = ReLU(max_value=6)(x) # ReLU6 for better quantization

    # [CPN2] Conv change deep
    x = Conv2D(filters, kernel_size=1, strides=1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU(max_value=6)(x) # ReLU6
    return x

def get_network(wide_ts_in, num_classes):
    """
    Creates a MobileNetV1 model.

    Args:
        input_shape: Shape of the input image.
        num_classes: Number of classes for classification.

    Returns:
        A Keras Model instance.
    """
    inputs = Input(shape=(wide_ts_in, wide_ts_in, 3))

    x = Conv2D(32, kernel_size=3, strides=2, padding='same', use_bias=False)(inputs)
    x = BatchNormalization()(x)
    x = ReLU(max_value=6)(x)

    x = mobilnet_v1_block(x, 64, strides=1)
    x = mobilnet_v1_block(x, 128, strides=2)
    x = mobilnet_v1_block(x, 128, strides=1)
    x = mobilnet_v1_block(x, 256, strides=2)
    x = mobilnet_v1_block(x, 256, strides=1)
    x = mobilnet_v1_block(x, 512, strides=2)

    for _ in range(5):  # 5 blocks with 512 filters
        x = mobilnet_v1_block(x, 512, strides=1)

    x = mobilnet_v1_block(x, 1024, strides=2)
    x = mobilnet_v1_block(x, 1024, strides=1)

    x = AvgPool2D(pool_size=(7, 7), strides=1, padding="same")(x) # Using AvgPool2D
    x = Flatten()(x)  # Flatten the output of AvgPool2D
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model


In [11]:
def compile_network(network, algo_type, loss_type):
    adam = tf.keras.optimizers.Adam(learning_rate=1e-4)
    sgd = tf.keras.optimizers.SGD(learning_rate=1e-4)
    ce = "categorical_crossentropy"
    fce = "categorical_focal_crossentropy"
    if algo_type == "adam":
        algo_type = adam
    elif algo_type == "sgd":
        algo_type = sgd
    if loss_type == "ce":
        loss_type = ce
    elif loss_type == "fce":
        loss_type = fce
    network.compile(optimizer=algo_type, loss=loss_type, metrics=["accuracy"])
    return network

### Gen loader

## Adam + CE

In [12]:
model = get_network(224, 101)
model = compile_network(model, "adam", "ce")
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 208ms/step - accuracy: 0.1191 - loss: 5.1663 - val_accuracy: 0.0472 - val_loss: 4.8654
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 90ms/step - accuracy: 0.2171 - loss: 4.0410 - val_accuracy: 0.0472 - val_loss: 5.5322
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 91ms/step - accuracy: 0.2900 - loss: 3.4622 - val_accuracy: 0.0449 - val_loss: 6.4993
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 90ms/step - accuracy: 0.3451 - loss: 3.1187 - val_accuracy: 0.0063 - val_loss: 7.2188
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 91ms/step - accuracy: 0.4034 - loss: 2.7240 - val_accuracy: 0.2984 - val_loss: 3.3603
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 91ms/step - accuracy: 0.4366 - loss: 2.4751 - val_accuracy: 0.3370 - val_loss: 3.1206
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x7c18c996a4a0>

In [13]:
from sklearn.metrics import accuracy_score
def eval_score(model):
    # Predict on the training set
    y_train_pred = model.predict(X_train)
    y_train_pred_classes = np.argmax(y_train_pred, axis=1)  # Convert probabilities to class labels
    y_train_true_classes = np.argmax(y_train, axis=1)

    # Predict on the validation set
    y_val_pred = model.predict(X_val)
    y_val_pred_classes = np.argmax(y_val_pred, axis=1)
    y_val_true_classes = np.argmax(y_val, axis=1)

    # Predict on the test set
    y_test_pred = model.predict(X_test)
    y_test_pred_classes = np.argmax(y_test_pred, axis=1)
    y_test_true_classes = np.argmax(y_test, axis=1)

    # Calculate accuracy for each set
    train_accuracy = accuracy_score(y_train_true_classes, y_train_pred_classes)
    val_accuracy = accuracy_score(y_val_true_classes, y_val_pred_classes)
    test_accuracy = accuracy_score(y_test_true_classes, y_test_pred_classes)

    # Print the results
    print(f"Training Accuracy: {train_accuracy:.4f}")
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")

eval_score(model)

[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 33ms/step
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
Training Accuracy: 0.5556
Validation Accuracy: 0.4038
Test Accuracy: 0.4090


## Adam + FCE

In [14]:
model = get_network(224, 101)
model = compile_network(model, "adam", "fce")
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))
eval_score(model)

Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 174ms/step - accuracy: 0.1175 - loss: 1.2919 - val_accuracy: 0.0472 - val_loss: 1.2060
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 91ms/step - accuracy: 0.2112 - loss: 0.9897 - val_accuracy: 0.0472 - val_loss: 1.5116
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 90ms/step - accuracy: 0.2908 - loss: 0.8202 - val_accuracy: 0.0472 - val_loss: 1.7189
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 91ms/step - accuracy: 0.3437 - loss: 0.7113 - val_accuracy: 0.0086 - val_loss: 1.7300
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 91ms/step - accuracy: 0.3892 - loss: 0.6276 - val_accuracy: 0.2546 - val_loss: 0.8511
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 91ms/step - accuracy: 0.4276 - loss: 0.5681 - val_accuracy: 0.3468 - val_loss: 0.6840
Epoch 7/10
[1m

## SGD + CE

In [15]:
model = get_network(224, 101)
model = compile_network(model, "sgd", "ce")
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))
eval_score(model)

Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 160ms/step - accuracy: 0.0677 - loss: 4.5311 - val_accuracy: 0.0916 - val_loss: 4.4252
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 89ms/step - accuracy: 0.1397 - loss: 4.2081 - val_accuracy: 0.0916 - val_loss: 4.3244
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 90ms/step - accuracy: 0.1565 - loss: 4.1390 - val_accuracy: 0.0916 - val_loss: 4.3192
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 89ms/step - accuracy: 0.1434 - loss: 4.1280 - val_accuracy: 0.0916 - val_loss: 4.3504
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 89ms/step - accuracy: 0.1503 - loss: 4.0938 - val_accuracy: 0.0916 - val_loss: 4.2653
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 89ms/step - accuracy: 0.1644 - loss: 4.0631 - val_accuracy: 0.1532 - val_loss: 4.0916
Epoch 7/10
[1m

## SGD + FCE

In [16]:
model = get_network(224, 101)
model = compile_network(model, "sgd", "fce")
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))
eval_score(model)

Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 160ms/step - accuracy: 0.0439 - loss: 1.1460 - val_accuracy: 0.0046 - val_loss: 1.1096
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 89ms/step - accuracy: 0.1167 - loss: 1.0490 - val_accuracy: 0.0916 - val_loss: 1.0802
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 90ms/step - accuracy: 0.1257 - loss: 1.0288 - val_accuracy: 0.0916 - val_loss: 1.0637
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 89ms/step - accuracy: 0.1415 - loss: 1.0133 - val_accuracy: 0.0916 - val_loss: 1.0616
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 89ms/step - accuracy: 0.1438 - loss: 1.0041 - val_accuracy: 0.0916 - val_loss: 1.0260
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 89ms/step - accuracy: 0.1555 - loss: 0.9890 - val_accuracy: 0.1382 - val_loss: 0.9995
Epoch 7/10
[1m

## Task2

In [19]:
def get_edit_network(wide_ts_in, num_classes):
    inputs = Input(shape=(wide_ts_in, wide_ts_in, 3))

    x = Conv2D(32, kernel_size=7, strides=2, padding='same', use_bias=False)(inputs)
    x = BatchNormalization()(x)
    x = ReLU(max_value=6)(x)

    # x = mobilnet_v1_block(x, 64, strides=1)
    x = mobilnet_v1_block(x, 128, strides=2)
    x = mobilnet_v1_block(x, 128, strides=1)
    x = mobilnet_v1_block(x, 256, strides=2)
    x = mobilnet_v1_block(x, 256, strides=1)
    x = mobilnet_v1_block(x, 512, strides=2)

    for _ in range(5):  # 5 blocks with 512 filters
        x = mobilnet_v1_block(x, 512, strides=1)

    x = mobilnet_v1_block(x, 1024, strides=2)
    x = mobilnet_v1_block(x, 1024, strides=1)

    x = AvgPool2D(pool_size=(7, 7), strides=1, padding="same")(x) # Using AvgPool2D
    x = Flatten()(x)  # Flatten the output of AvgPool2D
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

In [20]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

# Calculate class weights
y_train_true_classes = np.argmax(y_train, axis=1)  # Get true class labels
class_weights = compute_class_weight(class_weight='balanced',
                                     classes=np.unique(y_train_true_classes),
                                     y=y_train_true_classes)
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

# Define and compile the model with weighted-class cross-entropy loss
model = get_edit_network(224, 101)
model = compile_network(model, "adam", "ce")  # Using 'ce' for cross-entropy, weights will be applied in fit

# Train the model with class weights
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), class_weight=class_weights_dict)

# Evaluate the model
eval_score(model)

Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 166ms/step - accuracy: 0.0137 - loss: 6.0096 - val_accuracy: 0.0916 - val_loss: 5.1345
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 77ms/step - accuracy: 0.0568 - loss: 4.9324 - val_accuracy: 0.0472 - val_loss: 5.8584
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 78ms/step - accuracy: 0.1193 - loss: 4.5080 - val_accuracy: 0.0472 - val_loss: 5.7407
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 79ms/step - accuracy: 0.1816 - loss: 4.1742 - val_accuracy: 0.0058 - val_loss: 8.3105
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 79ms/step - accuracy: 0.2362 - loss: 3.8768 - val_accuracy: 0.1158 - val_loss: 4.5394
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 79ms/step - accuracy: 0.3111 - loss: 3.5583 - val_accuracy: 0.2569 - val_loss: 3.4658
Epoch 7/10
[1m