# Experiment SetUp - 50 Trials

Parameters:
- l1
- l1 vs. batch_size
- epochs vs. batch_size
- epochs vs. lr
- lr vs. batch_size

### Imports

In [11]:
from functools import partial
import os
import random 

import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import random_split

from ray import tune
from ray.air import Checkpoint, session
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.basic_variant import BasicVariantGenerator
from ray.tune import Callback

In [12]:
torch.manual_seed(40)
random.seed(40)
np.random.seed(40)

#### Class to print chosen l1 param

In [13]:
class PrintChosenL1Callback(Callback):
    def on_trial_complete(self, iteration, trials, trial, **info):
        chosen_l1 = trial.config["l1"]
        print(f"Trial {trial.trial_id}: Chosen l1 = {chosen_l1}")

In [14]:
print_l1_callback = PrintChosenL1Callback()

### CNN Model SetUp

In [15]:
class FashionCNN(nn.Module):
    def __init__(self, l1=64):
        super(FashionCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, l1)
        self.fc2 = nn.Linear(l1, 10)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

### Training

#### Training function

In [16]:
def train_fashion_mnist(config):
    net = FashionCNN(config["l1"]) 

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    # loading data
    data_dir = "./data"
    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))])

    trainset = torchvision.datasets.FashionMNIST(data_dir,
        download=True,
        train=True,
        transform=transform)
    testset = torchvision.datasets.FashionMNIST(data_dir,
        download=True,
        train=False,
        transform=transform)
    
    # Split the training set into subsets
    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(trainset, [test_abs, len(trainset) - test_abs])

    # Create data loaders for subsets
    trainloader = torch.utils.data.DataLoader(
        train_subset, batch_size=config["batch_size"], shuffle=True, num_workers=2
    )
    valloader = torch.utils.data.DataLoader(
        val_subset, batch_size=config["batch_size"], shuffle=True, num_workers=2
    )

    # defining the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=config["learning_rate"])

    for epoch in range(config["epochs"]):
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            epoch_steps += 1

            if i % 2000 == 1999:
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / epoch_steps))
                running_loss = 0.0

        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        with torch.no_grad():
            for data in valloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.item()
                val_steps += 1

        tune.report(
            mean_accuracy=correct / total,
            mean_val_loss=val_loss / val_steps
        )

#### Experiment Parameters

In [17]:
max_num_epochs = 15
num_samples = 50

#### AHSA scheduler

In [18]:
scheduler = ASHAScheduler(
    metric="mean_val_loss",
    mode="min",
    max_t=max_num_epochs,
    grace_period=1,
    reduction_factor=2,
)

### 1st Experiment - l1

In [19]:
config = {
    "epochs": tune.choice([10]),
    "learning_rate": 1e-3, # 0.001
    "batch_size": tune.choice([64]),
    "l1": tune.sample_from(lambda _: 2**np.random.randint(5, 7)) # 32, 64, 128
}

In [20]:
result_l1 = tune.run(
    partial(train_fashion_mnist),
    resources_per_trial={"cpu": 8, "gpu": 0},
    config=config,
    num_samples=num_samples,
    storage_path='./tune_runs/',
    search_alg=BasicVariantGenerator(random_state=40),
    scheduler=scheduler,
    callbacks=[print_l1_callback]
)

best_trial = result_l1.get_best_trial("mean_val_loss", mode="min")
best_config = best_trial.config
chosen_l1 = best_config["l1"]

print("Chosen l1:", chosen_l1)

best_metrics = best_trial.metric_analysis

print("Best trial config:", best_config)
print("Best trial metrics:", best_metrics)

2023-08-21 16:46:56,520	INFO tune.py:657 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2023-08-21 16:58:15
Running for:,00:11:18.86
Memory:,13.2/15.7 GiB

Trial name,status,loc,batch_size,epochs,acc,iter,total time (s),mean_val_loss
train_fashion_mnist_93602_00001,RUNNING,127.0.0.1:42544,64,10,,,,
train_fashion_mnist_93602_00002,PENDING,,64,10,,,,
train_fashion_mnist_93602_00003,PENDING,,64,10,,,,
train_fashion_mnist_93602_00004,PENDING,,64,10,,,,
train_fashion_mnist_93602_00005,PENDING,,64,10,,,,
train_fashion_mnist_93602_00006,PENDING,,64,10,,,,
train_fashion_mnist_93602_00007,PENDING,,64,10,,,,
train_fashion_mnist_93602_00008,PENDING,,64,10,,,,
train_fashion_mnist_93602_00009,PENDING,,64,10,,,,
train_fashion_mnist_93602_00010,PENDING,,64,10,,,,


[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz


0.5%[36m(func pid=42544)[0m 
1.2%[36m(func pid=42544)[0m 
1.4%[36m(func pid=42544)[0m 
2.4%[36m(func pid=42544)[0m 
2.5%[36m(func pid=42544)[0m 
3.3%[36m(func pid=42544)[0m 
4.3%[36m(func pid=42544)[0m 
5.5%[36m(func pid=42544)[0m 
6.4%[36m(func pid=42544)[0m 
6.6%[36m(func pid=42544)[0m 
7.6%[36m(func pid=42544)[0m 
7.7%[36m(func pid=42544)[0m 
8.6%[36m(func pid=42544)[0m 
8.7%[36m(func pid=42544)[0m 
8.8%[36m(func pid=42544)[0m 
9.8%[36m(func pid=42544)[0m 
10.7%[36m(func pid=42544)[0m 
11.7%[36m(func pid=42544)[0m 
12.5%[36m(func pid=42544)[0m 
13.3%[36m(func pid=42544)[0m 
13.4%[36m(func pid=42544)[0m 
14.3%[36m(func pid=42544)[0m 
14.4%[36m(func pid=42544)[0m 
15.1%[36m(func pid=42544)[0m 
15.9%[36m(func pid=42544)[0m 
16.9%[36m(func pid=42544)[0m 
17.6%[36m(func pid=42544)[0m 
18.6%[36m(func pid=42544)[0m 
19.5%[36m(func pid=42544)[0m 
20.2%[36m(func pid=42544)[0m 
20.3%[36m(func pid=42544)[0m 
21.0%[36m(func pid=42544)[0m 
21.8%[36

[2m[36m(func pid=42544)[0m Extracting ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw
[2m[36m(func pid=42544)[0m 
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz
[2m[36m(func pid=42544)[0m Extracting ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw
[2m[36m(func pid=42544)[0m 
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz


100.0%36m(func pid=42544)[0m 


[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


1.5%[36m(func pid=42544)[0m 
6.7%[36m(func pid=42544)[0m 
12.6%[36m(func pid=42544)[0m 
17.8%[36m(func pid=42544)[0m 
22.2%[36m(func pid=42544)[0m 
23.0%[36m(func pid=42544)[0m 
27.4%[36m(func pid=42544)[0m 
30.4%[36m(func pid=42544)[0m 
35.6%[36m(func pid=42544)[0m 
40.8%[36m(func pid=42544)[0m 
41.5%[36m(func pid=42544)[0m 
47.4%[36m(func pid=42544)[0m 
52.6%[36m(func pid=42544)[0m 
57.8%[36m(func pid=42544)[0m 
58.5%[36m(func pid=42544)[0m 
63.0%[36m(func pid=42544)[0m 
67.4%[36m(func pid=42544)[0m 
72.6%[36m(func pid=42544)[0m 
73.4%[36m(func pid=42544)[0m 
78.5%[36m(func pid=42544)[0m 
82.3%[36m(func pid=42544)[0m 
84.5%[36m(func pid=42544)[0m 
91.1%[36m(func pid=42544)[0m 
95.6%[36m(func pid=42544)[0m 
100.0%36m(func pid=42544)[0m 


[2m[36m(func pid=42544)[0m Extracting ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw
[2m[36m(func pid=42544)[0m 
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%36m(func pid=42544)[0m 


[2m[36m(func pid=42544)[0m Extracting ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw
[2m[36m(func pid=42544)[0m 


Trial name,mean_accuracy,mean_val_loss
train_fashion_mnist_93602_00000,0.7855,0.567177


Trial 93602_00000: Chosen l1 = 32
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz


0.7%[36m(func pid=42544)[0m 
1.5%[36m(func pid=42544)[0m 
1.6%[36m(func pid=42544)[0m 
1.7%[36m(func pid=42544)[0m 
1.9%[36m(func pid=42544)[0m 
2.1%[36m(func pid=42544)[0m 
3.1%[36m(func pid=42544)[0m 
3.2%[36m(func pid=42544)[0m 
3.3%[36m(func pid=42544)[0m 
3.5%[36m(func pid=42544)[0m 
3.7%[36m(func pid=42544)[0m 
3.8%[36m(func pid=42544)[0m 
4.8%[36m(func pid=42544)[0m 
5.0%[36m(func pid=42544)[0m 
5.1%[36m(func pid=42544)[0m 
5.2%[36m(func pid=42544)[0m 
6.0%[36m(func pid=42544)[0m 
6.9%[36m(func pid=42544)[0m 
7.2%[36m(func pid=42544)[0m 
8.1%[36m(func pid=42544)[0m 
8.2%[36m(func pid=42544)[0m 
8.3%[36m(func pid=42544)[0m 
8.9%[36m(func pid=42544)[0m 
9.1%[36m(func pid=42544)[0m 
9.2%[36m(func pid=42544)[0m 
10.0%[36m(func pid=42544)[0m 
10.3%[36m(func pid=42544)[0m 
11.2%[36m(func pid=42544)[0m 
11.3%[36m(func pid=42544)[0m 
11.4%[36m(func pid=42544)[0m 
11.5%[36m(func pid=42544)[0m 
11.7%[36m(func pid=42544)[0m 
11.8%[36

[2m[36m(func pid=42544)[0m Extracting ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw
[2m[36m(func pid=42544)[0m 
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz
[2m[36m(func pid=42544)[0m Extracting ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw
[2m[36m(func pid=42544)[0m 
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz


100.0%36m(func pid=42544)[0m 


[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


3.0%[36m(func pid=42544)[0m 
8.9%[36m(func pid=42544)[0m 
13.3%[36m(func pid=42544)[0m 
14.8%[36m(func pid=42544)[0m 
20.7%[36m(func pid=42544)[0m 
23.7%[36m(func pid=42544)[0m 
24.5%[36m(func pid=42544)[0m 
28.9%[36m(func pid=42544)[0m 
29.6%[36m(func pid=42544)[0m 
34.1%[36m(func pid=42544)[0m 
40.0%[36m(func pid=42544)[0m 
44.5%[36m(func pid=42544)[0m 
49.6%[36m(func pid=42544)[0m 
54.1%[36m(func pid=42544)[0m 
59.3%[36m(func pid=42544)[0m 
63.7%[36m(func pid=42544)[0m 
68.2%[36m(func pid=42544)[0m 
68.9%[36m(func pid=42544)[0m 
73.4%[36m(func pid=42544)[0m 
74.1%[36m(func pid=42544)[0m 
74.8%[36m(func pid=42544)[0m 
80.0%[36m(func pid=42544)[0m 
83.0%[36m(func pid=42544)[0m 
83.7%[36m(func pid=42544)[0m 
88.2%[36m(func pid=42544)[0m 
93.4%[36m(func pid=42544)[0m 
94.1%[36m(func pid=42544)[0m 
98.6%[36m(func pid=42544)[0m 
100.0%36m(func pid=42544)[0m 


[2m[36m(func pid=42544)[0m Extracting ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw
[2m[36m(func pid=42544)[0m 
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
[2m[36m(func pid=42544)[0m Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%36m(func pid=42544)[0m 


[2m[36m(func pid=42544)[0m Extracting ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw
[2m[36m(func pid=42544)[0m 


2023-08-21 16:58:25,564	INFO tune.py:1148 -- Total run time: 689.04 seconds (678.70 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)
- C:\Users\ibele\ray_results\train_fashion_mnist_2023-08-21_16-46-56\train_fashion_mnist_93602_00001_1_batch_size=64,epochs=10_2023-08-21_16-46-56
- C:\Users\ibele\ray_results\train_fashion_mnist_2023-08-21_16-46-56\train_fashion_mnist_93602_00002_2_batch_size=64,epochs=10_2023-08-21_16-46-56
- C:\Users\ibele\ray_results\train_fashion_mnist_2023-08-21_16-46-56\train_fashion_mnist_93602_00003_3_batch_size=64,epochs=10_2023-08-21_16-46-56
- C:\Users\ibele\ray_results\train_fashion_mnist_2023-08-21_16-46-56\train_fashion_mnist_93602_00004_4_batch_size=64,epochs=10_2023-08-21_16-46-56
- C:\Users\ibele\ray_results\train_fashion_mnist_2023-08-21_16-46-56\train_fashion_mnist_93602_00005_5_batch_size=64,epochs=10_2023-08-21_16-46-56
- C:\Users\ibele\ray_results\train_fashion_mnist_2023-08-21_16-46-56\train_fashion_mnist_93602_00006

Chosen l1: 32
Best trial config: {'epochs': 10, 'learning_rate': 0.001, 'batch_size': 64, 'l1': 32}
Best trial metrics: {'mean_accuracy': {'max': 0.7890833333333334, 'min': 0.41508333333333336, 'avg': 0.7185416666666669, 'last': 0.7855, 'last-5-avg': 0.7798499999999999, 'last-10-avg': 0.7185416666666666}, 'mean_val_loss': {'max': 2.1220153164356312, 'min': 0.567176804897633, 'avg': 0.8612084210077496, 'last': 0.567176804897633, 'last-5-avg': 0.5957349429422236, 'last-10-avg': 0.8612084210077497}, 'time_this_iter_s': {'max': 71.91051888465881, 'min': 53.20462203025818, 'avg': 62.499757170677185, 'last': 67.07591915130615, 'last-5-avg': 67.05286979675293, 'last-10-avg': 62.499757170677185}, 'done': {'max': False, 'min': False, 'avg': 0.0, 'last': False, 'last-5-avg': 0.0, 'last-10-avg': 0.0}, 'training_iteration': {'max': 10, 'min': 1, 'avg': 5.5, 'last': 10, 'last-5-avg': 8.0, 'last-10-avg': 5.5}, 'time_total_s': {'max': 624.9975717067719, 'min': 70.44486260414124, 'avg': 335.6692216873

### 2nd Experiment - l1 vs. batch_size

In [None]:
config = {
    "epochs": tune.choice([10]),
    "learning_rate": tune.loguniform(1e-3), 
    "batch_size": tune.choice([16, 32, 64, 128]),
    "l1": tune.sample_from(lambda _: 2**np.random.randint(5, 7))
}

In [None]:
result_l1_bs = tune.run(
    partial(train_fashion_mnist),
    resources_per_trial={"cpu": 8, "gpu": 0},
    config=config,
    num_samples=num_samples,
    storage_path='./tune_runs/',
    search_alg=BasicVariantGenerator(random_state=40),
    scheduler=scheduler,
    callbacks=[print_l1_callback]
)

best_trial = result_l1_bs.get_best_trial("mean_val_loss", mode="min")
best_config = best_trial.config
chosen_l1 = best_config["l1"]

print("Chosen l1:", chosen_l1)

best_metrics = best_trial.metric_analysis

print("Best trial config:", best_config)
print("Best trial metrics:", best_metrics)

### 3rd Experiment - epochs vs. batch_size

In [None]:
config = {
    "epochs": tune.choice([5, 10, 15]),
    "learning_rate": tune.loguniform(1e-3), 
    "batch_size": tune.choice([16, 32, 64, 128])
}

In [None]:
result_ep_bs = tune.run(
    partial(train_fashion_mnist),
    resources_per_trial={"cpu": 8, "gpu": 0},
    config=config,
    num_samples=num_samples,
    storage_path='./tune_runs/',
    search_alg=BasicVariantGenerator(random_state=40),
    scheduler=scheduler
)

best_trial = result_ep_bs.get_best_trial("mean_val_loss", mode="min")
best_config = best_trial.config
chosen_l1 = best_config["l1"]

print("Chosen l1:", chosen_l1)

best_metrics = best_trial.metric_analysis

print("Best trial config:", best_config)
print("Best trial metrics:", best_metrics)

### 4th Experiment - epochs vs. lr

In [None]:
config = {
    "epochs": tune.choice([5, 10, 15]),
    "learning_rate": tune.loguniform(1e-3, 1e-1), 
    "batch_size": tune.choice([64])
}

In [None]:
result_ep_lr = tune.run(
    partial(train_fashion_mnist),
    resources_per_trial={"cpu": 8, "gpu": 0},
    config=config,
    num_samples=num_samples,
    storage_path='./tune_runs/',
    search_alg=BasicVariantGenerator(random_state=40),
    scheduler=scheduler
)

best_trial = result_ep_lr.get_best_trial("mean_val_loss", mode="min")
best_config = best_trial.config
chosen_l1 = best_config["l1"]

print("Chosen l1:", chosen_l1)

best_metrics = best_trial.metric_analysis

print("Best trial config:", best_config)
print("Best trial metrics:", best_metrics)

### 5th Experiment - batch_size vs. lr

In [None]:
config = {
    "epochs": tune.choice([10]),
    "learning_rate": tune.loguniform(1e-3, 1e-1), 
    "batch_size": tune.choice([16, 32, 64, 128])
}

In [None]:
result_bs_lr = tune.run(
    partial(train_fashion_mnist),
    resources_per_trial={"cpu": 8, "gpu": 0},
    config=config,
    num_samples=num_samples,
    storage_path='./tune_runs/',
    search_alg=BasicVariantGenerator(random_state=40),
    scheduler=scheduler
)

best_trial = result_bs_lr.get_best_trial("mean_val_loss", mode="min")
best_config = best_trial.config
chosen_l1 = best_config["l1"]

print("Chosen l1:", chosen_l1)

best_metrics = best_trial.metric_analysis

print("Best trial config:", best_config)
print("Best trial metrics:", best_metrics)