# SummerCourse Week2 HW
## [GitHub](https://github.com/FCWTW/SummerCourse/tree/main/Week%202)
## Import library

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import pandas as pd
from PIL import Image
import numpy as np
import optuna

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## Define a custom dataset class

In [2]:
class MyDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        image = Image.fromarray(self.data_frame.iloc[idx, 1:].values.reshape(28, 28).astype(np.uint8))
        label = int(self.data_frame.iloc[idx, 0])
        if self.transform:
            image = self.transform(image)
        return image, label

## Data preprocessing

In [3]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=45),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

## Define dataset and dataloader

In [4]:
train_path = '/kaggle/input/fashionmnist/fashion-mnist_train.csv'
test_path = '/kaggle/input/fashionmnist/fashion-mnist_test.csv'

train_data = MyDataset(csv_file=train_path, transform=train_transform)
test_data = MyDataset(csv_file=test_path, transform=test_transform)
testLen = int(len(test_data) * 0.5)
valLen = len(test_data) - testLen
test_data, val_data = random_split(test_data, [testLen, valLen])

## Define CNN model

In [5]:
class Net(nn.Module):
    def __init__(self, dropout_rate):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

## Train function

In [6]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs, trial):
    best_accuracy = 0.0
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = correct_train / total_train
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.4f}")

        # Evaluate on validation set every epoch
        model.eval()
        running_val_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

            val_loss = running_val_loss / len(val_loader.dataset)
            val_accuracy = correct_val / total_val

            # Save model weights if validation accuracy is improved
            if val_accuracy > best_accuracy:
                torch.save(model.state_dict(), f'best_model_{trial.number}.pth')
                best_accuracy = val_accuracy

            print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

    # Return the best accuracy for Optuna to maximize
    return best_accuracy

## Test function

In [7]:
def test_model(model, test_loader, criterion):
    model.eval()
    running_test_loss = 0.0
    correct_test = 0
    total_test = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_test_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total_test += labels.size(0)
            correct_test += (predicted == labels).sum().item()

    test_loss = running_test_loss / len(test_loader.dataset)
    test_accuracy = correct_test / total_test

    print(f'Testing Loss: {test_loss:.4f}, Testing Accuracy: {test_accuracy:.4f}')

## Search better hyperparameters with Optuna

In [8]:
def objective(trial):
    # Define hyperparameters
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [64, 128, 256])
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)

    # Initialize model, loss and optimizer
    model = Net(dropout_rate=dropout_rate).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

    # Create DataLoader with current batch size
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
    
    # Train the model
    best_accuracy = train_model(model, train_loader, val_loader, criterion, optimizer, 20, trial)

    # Return the best accuracy as the objective to maximize
    return best_accuracy

## train model

In [9]:
# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=5)

[I 2024-08-09 11:24:37,123] A new study created in memory with name: no-name-b6711a2b-db2c-4964-ad2d-a391f8cd27e7
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)


Epoch 1/20, Training Loss: 0.7954, Training Accuracy: 0.7050
Validation Loss: 0.5935, Validation Accuracy: 0.7846
Epoch 2/20, Training Loss: 0.5776, Training Accuracy: 0.7863
Validation Loss: 0.5013, Validation Accuracy: 0.8158
Epoch 3/20, Training Loss: 0.5153, Training Accuracy: 0.8119
Validation Loss: 0.4492, Validation Accuracy: 0.8384
Epoch 4/20, Training Loss: 0.4867, Training Accuracy: 0.8232
Validation Loss: 0.4373, Validation Accuracy: 0.8400
Epoch 5/20, Training Loss: 0.4680, Training Accuracy: 0.8296
Validation Loss: 0.4089, Validation Accuracy: 0.8488
Epoch 6/20, Training Loss: 0.4449, Training Accuracy: 0.8384
Validation Loss: 0.4447, Validation Accuracy: 0.8422
Epoch 7/20, Training Loss: 0.4376, Training Accuracy: 0.8399
Validation Loss: 0.3726, Validation Accuracy: 0.8652
Epoch 8/20, Training Loss: 0.4284, Training Accuracy: 0.8449
Validation Loss: 0.3637, Validation Accuracy: 0.8724
Epoch 9/20, Training Loss: 0.4210, Training Accuracy: 0.8482
Validation Loss: 0.4030, Va

[I 2024-08-09 11:37:10,156] Trial 0 finished with value: 0.888 and parameters: {'learning_rate': 0.00328964155258187, 'batch_size': 128, 'dropout_rate': 0.1381197686826482}. Best is trial 0 with value: 0.888.


Validation Loss: 0.3230, Validation Accuracy: 0.8880
Epoch 1/20, Training Loss: 1.5725, Training Accuracy: 0.4430
Validation Loss: 1.0107, Validation Accuracy: 0.6622
Epoch 2/20, Training Loss: 1.0785, Training Accuracy: 0.6103
Validation Loss: 0.8214, Validation Accuracy: 0.7032
Epoch 3/20, Training Loss: 0.9493, Training Accuracy: 0.6547
Validation Loss: 0.7630, Validation Accuracy: 0.7224
Epoch 4/20, Training Loss: 0.8809, Training Accuracy: 0.6774
Validation Loss: 0.7185, Validation Accuracy: 0.7284
Epoch 5/20, Training Loss: 0.8339, Training Accuracy: 0.6931
Validation Loss: 0.6912, Validation Accuracy: 0.7414
Epoch 6/20, Training Loss: 0.8038, Training Accuracy: 0.7054
Validation Loss: 0.6651, Validation Accuracy: 0.7468
Epoch 7/20, Training Loss: 0.7834, Training Accuracy: 0.7121
Validation Loss: 0.6413, Validation Accuracy: 0.7564
Epoch 8/20, Training Loss: 0.7576, Training Accuracy: 0.7206
Validation Loss: 0.6386, Validation Accuracy: 0.7564
Epoch 9/20, Training Loss: 0.7451, 

[I 2024-08-09 11:49:26,584] Trial 1 finished with value: 0.804 and parameters: {'learning_rate': 0.00011051591577190123, 'batch_size': 256, 'dropout_rate': 0.4280118236657635}. Best is trial 0 with value: 0.888.


Validation Loss: 0.5344, Validation Accuracy: 0.8040
Epoch 1/20, Training Loss: 1.2331, Training Accuracy: 0.5697
Validation Loss: 0.8156, Validation Accuracy: 0.7022
Epoch 2/20, Training Loss: 0.8481, Training Accuracy: 0.6923
Validation Loss: 0.7058, Validation Accuracy: 0.7338
Epoch 3/20, Training Loss: 0.7621, Training Accuracy: 0.7181
Validation Loss: 0.6674, Validation Accuracy: 0.7526
Epoch 4/20, Training Loss: 0.7168, Training Accuracy: 0.7326
Validation Loss: 0.6431, Validation Accuracy: 0.7580
Epoch 5/20, Training Loss: 0.6796, Training Accuracy: 0.7473
Validation Loss: 0.6137, Validation Accuracy: 0.7698
Epoch 6/20, Training Loss: 0.6534, Training Accuracy: 0.7590
Validation Loss: 0.5862, Validation Accuracy: 0.7808
Epoch 7/20, Training Loss: 0.6341, Training Accuracy: 0.7647
Validation Loss: 0.5719, Validation Accuracy: 0.7906
Epoch 8/20, Training Loss: 0.6102, Training Accuracy: 0.7731
Validation Loss: 0.5598, Validation Accuracy: 0.7952
Epoch 9/20, Training Loss: 0.5944, 

[I 2024-08-09 12:01:59,332] Trial 2 finished with value: 0.8532 and parameters: {'learning_rate': 0.0001278986203853944, 'batch_size': 128, 'dropout_rate': 0.1316251467927937}. Best is trial 0 with value: 0.888.


Validation Loss: 0.4247, Validation Accuracy: 0.8510
Epoch 1/20, Training Loss: 1.0687, Training Accuracy: 0.6140
Validation Loss: 0.7118, Validation Accuracy: 0.7302
Epoch 2/20, Training Loss: 0.7544, Training Accuracy: 0.7201
Validation Loss: 0.6213, Validation Accuracy: 0.7622
Epoch 3/20, Training Loss: 0.6838, Training Accuracy: 0.7447
Validation Loss: 0.5691, Validation Accuracy: 0.7816
Epoch 4/20, Training Loss: 0.6406, Training Accuracy: 0.7600
Validation Loss: 0.5452, Validation Accuracy: 0.7992
Epoch 5/20, Training Loss: 0.6041, Training Accuracy: 0.7752
Validation Loss: 0.5248, Validation Accuracy: 0.8004
Epoch 6/20, Training Loss: 0.5814, Training Accuracy: 0.7862
Validation Loss: 0.4882, Validation Accuracy: 0.8180
Epoch 7/20, Training Loss: 0.5547, Training Accuracy: 0.7951
Validation Loss: 0.4710, Validation Accuracy: 0.8256
Epoch 8/20, Training Loss: 0.5372, Training Accuracy: 0.8030
Validation Loss: 0.4549, Validation Accuracy: 0.8318
Epoch 9/20, Training Loss: 0.5202, 

[I 2024-08-09 12:14:43,278] Trial 3 finished with value: 0.8706 and parameters: {'learning_rate': 0.0004118906802455594, 'batch_size': 128, 'dropout_rate': 0.24657626650224068}. Best is trial 0 with value: 0.888.


Validation Loss: 0.3613, Validation Accuracy: 0.8696
Epoch 1/20, Training Loss: 0.9874, Training Accuracy: 0.6299
Validation Loss: 0.7084, Validation Accuracy: 0.7296
Epoch 2/20, Training Loss: 0.8226, Training Accuracy: 0.6932
Validation Loss: 0.6659, Validation Accuracy: 0.7442
Epoch 3/20, Training Loss: 0.7868, Training Accuracy: 0.7039
Validation Loss: 0.6943, Validation Accuracy: 0.7028
Epoch 4/20, Training Loss: 0.7583, Training Accuracy: 0.7146
Validation Loss: 0.5972, Validation Accuracy: 0.7758
Epoch 5/20, Training Loss: 0.7536, Training Accuracy: 0.7167
Validation Loss: 0.6142, Validation Accuracy: 0.7572
Epoch 6/20, Training Loss: 0.7396, Training Accuracy: 0.7231
Validation Loss: 0.6004, Validation Accuracy: 0.7804
Epoch 7/20, Training Loss: 0.7378, Training Accuracy: 0.7259
Validation Loss: 0.5969, Validation Accuracy: 0.7706
Epoch 8/20, Training Loss: 0.7326, Training Accuracy: 0.7288
Validation Loss: 0.6211, Validation Accuracy: 0.7700
Epoch 9/20, Training Loss: 0.7279, 

[I 2024-08-09 12:27:40,879] Trial 4 finished with value: 0.8052 and parameters: {'learning_rate': 0.006551084078849642, 'batch_size': 64, 'dropout_rate': 0.48073747976393355}. Best is trial 0 with value: 0.888.


Validation Loss: 0.5627, Validation Accuracy: 0.7944


## Test model

In [17]:
# Initialize best model
best_trial = study.best_trial
best_model_weights_path = f'best_model_{best_trial.number}.pth'
print("Parameters: ", best_trial.params)
print("Validation Accuracy: ", best_trial.value)

best_model = Net(dropout_rate=best_trial.params['dropout_rate']).to(device)
best_model.load_state_dict(torch.load(best_model_weights_path))

# Evaluate best model on the test set
test_loader = DataLoader(test_data, batch_size=best_trial.params['batch_size'], shuffle=False)
test_model(best_model, test_loader, nn.CrossEntropyLoss())

Parameters:  {'learning_rate': 0.00328964155258187, 'batch_size': 128, 'dropout_rate': 0.1381197686826482}
Validation Accuracy:  0.888
Testing Loss: 0.3164, Testing Accuracy: 0.8858
