<a href="https://colab.research.google.com/github/baranceanuvlad/Advanced-Topics-in-Neural-Networks-Template-2023/blob/main/Lab05/Solution/Homework5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from multiprocessing import freeze_support

import torch
from torchvision.datasets import CIFAR10
from torchvision.transforms import v2
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
import random
import torchvision.transforms as transformss

In [None]:
!pip install sam-pytorch




In [None]:
!pip install wandb



In [None]:
from sam import SAM

In [None]:
import wandb

In [None]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
        # For multi-gpu workstations, PyTorch will use the first available GPU (cuda:0), unless specified otherwise
        # (cuda:1).
    if torch.backends.mps.is_available():
        return torch.device('mos')
    return torch.device('cpu')


In [None]:
class CachedDataset(Dataset):
    def __init__(self, dataset, cache=True):
        if cache:
            dataset = tuple([x for x in dataset])
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, i):
        return self.dataset[i]

In [None]:
class MLP(torch.nn.Module):
    def __init__(self, input_size, hidden_size_1, hidden_size_2,hidden_size_3,hidden_size_4, output_size):
        super(MLP, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size_1)
        self.fc2 = torch.nn.Linear(hidden_size_1, hidden_size_2)
        self.fc3 = torch.nn.Linear(hidden_size_2, hidden_size_3)
        self.fc4 = torch.nn.Linear(hidden_size_3, hidden_size_4)
        self.fc5 = torch.nn.Linear(hidden_size_4, output_size)
        self.relu = torch.nn.ReLU(inplace=True)

    def forward(self, x):
        return self.fc5(self.relu(self.fc4(self.relu(self.fc3(self.relu(self.fc2(self.relu(self.fc1(x)))))))))
        # x = self.fc1(x)
        # x = self.relu(x)
        # x = self.fc2(x)
        # return x

In [None]:
def accuracy(output, labels):
    fp_plus_fn = torch.logical_not(output == labels).sum().item()
    all_elements = len(output)
    return (all_elements - fp_plus_fn) / all_elements

In [None]:
def train(model, train_loader, criterion, optimizer, device, writer):
    model.train()

    all_outputs = []
    all_labels = []
    batch_number = 0
    total_loss = 0

    for data, labels in train_loader:
        batch_number += 1

        data = data.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        #output = model(data)
        #loss = criterion(output, labels)
        #total_loss += loss.item()

        def closure():
          optimizer.zero_grad()
          output = model(data)
          loss = criterion(output, labels)

          writer.add_scalar("Batch Training/Loss", batch_number, loss)
          loss.backward()
          return loss
        optimizer.step(closure)

        #loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 5)

        #optimizer.step()
        #optimizer.zero_grad(set_to_none=True)
        output = model(data)
        output = output.softmax(dim=1).detach().cpu().squeeze()
        labels = labels.cpu().squeeze()
        all_outputs.append(output)
        all_labels.append(labels)

    all_outputs = torch.cat(all_outputs).argmax(dim=1)
    all_labels = torch.cat(all_labels)

    return round(accuracy(all_outputs, all_labels), 4) , total_loss / len(train_loader)

In [None]:
def val(model, val_loader, criterion, device):
    model.eval()

    all_outputs = []
    all_labels = []
    total_loss = 0

    for data, labels in val_loader:
        data = data.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        with torch.no_grad():
            output = model(data)

        loss = criterion(output, labels)
        total_loss += loss.item()

        output = output.softmax(dim=1).cpu().squeeze()
        labels = labels.cpu().squeeze()
        all_outputs.append(output)
        all_labels.append(labels)

    all_outputs = torch.cat(all_outputs).argmax(dim=1)
    all_labels = torch.cat(all_labels)

    return round(accuracy(all_outputs, all_labels), 4), total_loss / len(val_loader)

In [None]:
def do_epoch(model, train_loader, val_loader, criterion, optimizer, device, writer):
    acc, loss = train(model, train_loader, criterion, optimizer, device, writer)
    acc_val, loss_val = val(model, val_loader, criterion, device)
    # torch.cuda.empty_cache()
    return acc, acc_val, loss, loss_val

In [None]:
def get_model_norm(model):
    norm = 0.0
    for param in model.parameters():
        norm += torch.norm(param)
    return norm


In [None]:
def main_train():
    run = wandb.init(
        project="image-classification",
        notes="My first experiment",
    )
    device=get_default_device()
    learning_rate = wandb.config.learning_rate
    batch_size = wandb.config.batch_size
    num_epochs = wandb.config.num_epochs

    mean = [0.5]  # For grayscale, use a single value for mean
    std = [0.5]
    transforms = [
        v2.ToImage(),
        v2.ToDtype(torch.float32, scale=True),
        v2.Resize((28, 28), antialias=True),
        v2.Grayscale(),
        transformss.Normalize(mean, std),
        torch.flatten,
    ]

    data_path = '../data'
    train_dataset = CIFAR10(root=data_path, train=True, transform=v2.Compose(transforms), download=True)
    val_dataset = CIFAR10(root=data_path, train=False, transform=v2.Compose(transforms), download=True)
    train_dataset = CachedDataset(train_dataset)
    val_dataset = CachedDataset(val_dataset)

    #batch_size = 256
    sharpness = 0.1
    base_learning_rate = learning_rate
    weight_decay = 0.001


    val_batch_size = 500
    num_workers = 2
    persistent_workers = (num_workers != 0)
    pin_memory = device.type == 'cuda'
    train_loader = DataLoader(train_dataset, shuffle=True, pin_memory=pin_memory, num_workers=num_workers,
                              batch_size=batch_size, drop_last=True, persistent_workers=persistent_workers)
    val_loader = DataLoader(val_dataset, shuffle=False, pin_memory=True, num_workers=0, batch_size=val_batch_size,
                            drop_last=False)

    model = MLP(784, 3136, 2000 ,1000, 500, 10)
    model = model.to(device)
    base_optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    optimizer = SAM(model.parameters(), base_optimizer, rho=sharpness)
    param_groups = [{'params': model.parameters()}]
    criterion = torch.nn.CrossEntropyLoss()

    log_dir = "logs"
    writer = SummaryWriter(log_dir)
    writer.add_scalar('Batch size', batch_size)
    writer.add_text('Optimizer', optimizer.__class__.__name__)
    writer.add_scalar('Learning rate', learning_rate)
    writer.add_scalar('Weight decay', weight_decay)

    tbar = tqdm(tuple(range(num_epochs)))
    for epoch in tbar:
        acc, acc_val, loss, loss_val = do_epoch(model, train_loader, val_loader, criterion, optimizer, device, writer)
        wandb.log({"accuracy": acc_val, "loss": loss_val})
        tbar.set_postfix_str(f"Acc: {acc}, Acc_val: {acc_val}")
        writer.add_scalar("Train/Loss", loss, epoch)
        writer.add_scalar("Train/Accuracy", acc, epoch)
        writer.add_scalar("Val/Loss", loss_val, epoch)
        writer.add_scalar("Val/Accuracy", acc_val, epoch)
        writer.add_scalar("Model/Norm", get_model_norm(model), epoch)


In [None]:
def main(device=get_default_device()):


    #paramaters_dict =  {"epochs": 200, "learning_rate": 0.01, "batch_size":  random.choice([32, 64, 128, 256]), "optimizer": "SGD with SAM"}

    sweep_config = {
      'method': 'random',  # You can choose other methods like 'grid', 'bayes', etc.
      'project': 'image-classification',
      'parameters': {
          'learning_rate': {'values': [0.001,0.05, 0.01, 0.1]},
          'batch_size': {'values': [32, 64, 128]},
          'num_epochs': {'values': [50, 100, 150]},
      }
    }

    #wandb.config = sweep_config['parameters']
    #learning_rate = wandb.config['learning_rate']
    #batch_size = wandb.config['batch_size']
    #epochs = wandb.config['num_epochs']






    sweep_id = wandb.sweep(sweep_config, project='image-classification')
    wandb.agent(sweep_id, function=main_train)

    #tbar = tqdm(tuple(range(epochs)))
    #for epoch in tbar:
     #   acc, acc_val, loss, loss_val = do_epoch(model, train_loader, val_loader, criterion, optimizer, device, writer)
      #  wandb.log({"accuracy": acc_val, "loss": loss_val})
       # tbar.set_postfix_str(f"Acc: {acc}, Acc_val: {acc_val}")
        #writer.add_scalar("Train/Loss", loss, epoch)
        #writer.add_scalar("Train/Accuracy", acc, epoch)
        #writer.add_scalar("Val/Loss", loss_val, epoch)
        #writer.add_scalar("Val/Accuracy", acc_val, epoch)
        #writer.add_scalar("Model/Norm", get_model_norm(model), epoch)



In [None]:
if __name__ == '__main__':
    freeze_support()
    main()

Create sweep with ID: 144fqq4m
Sweep URL: https://wandb.ai/vlapin/image-classification/sweeps/144fqq4m


[34m[1mwandb[0m: Agent Starting Run: vwy054f1 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 100
[34m[1mwandb[0m: Currently logged in as: [33mvlapin[0m. Use [1m`wandb login --relogin`[0m to force relogin


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 73692692.47it/s]


Extracting ../data/cifar-10-python.tar.gz to ../data
Files already downloaded and verified


 43%|████▎     | 43/100 [3:25:15<4:29:55, 284.14s/it, Acc: 0.3026, Acc_val: 0.3087]