In [1]:
from multiprocessing import freeze_support

import os
import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10
from torchvision.transforms import v2
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
import wandb
from sam import SAM
import json

2023-11-06 13:38:56.479549: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-06 13:38:56.531806: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-06 13:38:56.531852: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-06 13:38:56.531890: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-06 13:38:56.543303: I tensorflow/core/platform/cpu_feature_g

In [2]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
os.environ['WANDB_NOTEBOOK_NAME'] = 'tema5.ipynb'

In [3]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
        # For multi-gpu workstations, PyTorch will use the first available GPU (cuda:0), unless specified otherwise
        # (cuda:1).
    if torch.backends.mps.is_available():
        return torch.device('mos')
    return torch.device('cpu')

In [4]:
def get_transforms():
    train_transforms = [
        v2.ToImage(),
        v2.ToDtype(torch.float32, scale=True),
        # v2.RandomAffine(degrees=10, translate=(0.0, 0.1), scale=(0.8, 1.0)),
        # v2.RandomHorizontalFlip(0.25),
        # v2.RandomVerticalFlip(0.25),
        v2.Normalize(mean=[0.49139968, 0.48215827, 0.44653124], std=[0.24703233, 0.24348505, 0.26158768]),
        v2.Resize((28, 28), antialias=True),
        v2.Grayscale(),
        torch.flatten,
    ]
    valid_transforms = [
        v2.ToImage(),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=[0.49139968, 0.48215827, 0.44653124], std=[0.24703233, 0.24348505, 0.26158768]),
        v2.Resize((28, 28), antialias=True),
        v2.Grayscale(),
        torch.flatten,
    ]
    
    return train_transforms, valid_transforms

In [5]:
class CachedDataset(Dataset):
    def __init__(self, dataset, cache=True):
        if cache:
            dataset = tuple([x for x in dataset])
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, i):
        return self.dataset[i]

In [6]:
def get_dataset(train_transforms, valid_transforms):
    data_path = './data'
    train_dataset = CIFAR10(root=data_path, train=True, transform=v2.Compose(train_transforms), download=False)
    val_dataset = CIFAR10(root=data_path, train=False, transform=v2.Compose(valid_transforms), download=False)
    train_dataset = CachedDataset(train_dataset, cache=True)
    val_dataset = CachedDataset(val_dataset, cache=True)
    
    return train_dataset, val_dataset

In [7]:
class MLP(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.dropout_prob = 0.2

        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)

        self.fc2 = nn.Linear(hidden_size, hidden_size * 2)
        self.bn2 = nn.BatchNorm1d(hidden_size * 2)

        self.fc3 = nn.Linear(hidden_size * 2, hidden_size * 4)
        self.bn3 = nn.BatchNorm1d(hidden_size * 4)

        self.fc4 = nn.Linear(hidden_size * 4, hidden_size * 2)
        self.bn4 = nn.BatchNorm1d(hidden_size * 2)

        self.fc5 = nn.Linear(hidden_size * 2, hidden_size)
        self.bn5 = nn.BatchNorm1d(hidden_size)

        self.fc6 = nn.Linear(hidden_size, output_size)

        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(self.dropout_prob)

    def forward(self, x):
        for fct, bn in zip([self.fc1, self.fc2, self.fc3, self.fc4, self.fc5], [self.bn1, self.bn2, self.bn3, self.bn4, self.bn5]):
            x = fct(x)
            x = bn(x)
            x = self.relu(x)
            x = self.dropout(x)

        x = self.fc6(x)

        return x

In [8]:
def accuracy(labels, output):
    return (torch.argmax(output, axis=1) == labels).sum().item() / len(output)

In [9]:
def train(model, train_loader, criterion, optimizer, device, writer, optimizator):
    model.train()

    acc_train = 0.0
    loss_train = 0.0

    batch = 0
    for data, labels in train_loader:
        batch += 1
        data = data.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        
        def closure():
            optimizer.zero_grad()
            output = model(data)
            loss.backward()
            return loss

        output = model(data)
        loss = criterion(output, labels)
        loss_train += loss.item()
        writer.add_scalar("Loss/Training/Batch", batch, loss.item())
        optimizer.step(closure)

        acc_train += accuracy(labels, output)

    return loss_train / batch, round(acc_train / len(train_loader), 4)

In [10]:
def val(model, val_loader, criterion, device):
    model.eval()

    acc_val = 0.0
    loss_valid = 0.0

    for data, labels in val_loader:
        data = data.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        with torch.no_grad():
            output = model(data)
            
            loss = criterion(output, labels)
            loss_valid += loss.item()

        acc_val += accuracy(labels, output)

    return loss_valid / len(val_loader), round(acc_val / len(val_loader), 4)

In [11]:
def do_epoch(model, train_loader, val_loader, criterion, optimizer, device, writer, optimizator):
    loss_train, acc = train(model, train_loader, criterion, optimizer, device, writer, optimizator)
    loss_valid, acc_val = val(model, val_loader, criterion, device)

    return loss_train, acc, loss_valid, acc_val

In [12]:
def get_model_norm(model):
    norm = 0.0
    for param in model.parameters():
        norm += torch.norm(param)
    return norm

In [13]:
def poly_lr(ep, max_ep, initial_lr, exponent=0.9):
    return initial_lr * (1 - ep / max_ep)**exponent

In [14]:
best_global = 0.0
best_config = None

In [15]:
def main_train():
    wandb.init(
        project="cifar-10",

        config={
            "dataset": "CIFAR-10",
        }
    )
    
    batch_size = wandb.config.batch_size
    epochs = wandb.config.epochs
    optimizator = wandb.config.optimizer
    learning_rate = wandb.config.learning_rate
    make_poly_lr = wandb.config.poly_lr
    
    writer = SummaryWriter()
    writer.add_scalar("Batch_size", batch_size)
    writer.add_scalar("Learning_rate", learning_rate)
    writer.add_scalar("Optimizer", optimizator)
    
    device = get_default_device()
    
    train_transforms, valid_transforms = get_transforms()
    train_dataset, val_dataset = get_dataset(train_transforms, valid_transforms)

    pin_memory = device.type == 'cuda'
    num_workers = 2
    persistent_workers = (num_workers != 0)
    val_batch_size = 500
    train_loader = DataLoader(train_dataset, shuffle=True, pin_memory=pin_memory, num_workers=num_workers,
                            batch_size=batch_size, drop_last=True, persistent_workers=persistent_workers)
    val_loader = DataLoader(val_dataset, shuffle=False, pin_memory=True, num_workers=0, batch_size=val_batch_size,
                            drop_last=False)
    
    model = MLP(784, 1000, 10)
    model = model.to(device)

    if optimizator == 0:
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01)
    elif optimizator == 1:
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.99, weight_decay=3e-05, nesterov=True)
    elif optimizator == 2:
        optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, alpha=0.9)
    elif optimizator == 3:
        optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
    else:
        base_optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.99, weight_decay=3e-05, nesterov=True)
        optimizer = SAM(model.parameters(), base_optimizer, rho=0.05)
    
    criterion = torch.nn.CrossEntropyLoss()

    besst = 0
    
    tbar = tqdm(tuple(range(epochs)))
    for epoch in tbar:
        if make_poly_lr == 1:
            for g in optimizer.param_groups:
                lr = poly_lr(epoch, epochs, learning_rate, 0.9)
                g['lr'] = lr

        loss_train, acc, loss_valid, acc_val = do_epoch(model, train_loader, val_loader, criterion, optimizer, device, writer, optimizator)
        tbar.set_postfix_str(f"Acc: {acc}, Acc_val: {acc_val}")
        
        writer.add_scalar("Train/Accuracy", acc, epoch)
        writer.add_scalar("Train/Loss", loss_train, epoch)
        writer.add_scalar("Val/Accuracy", acc_val, epoch)
        writer.add_scalar("Val/Loss", loss_valid, epoch)
        
        writer.add_scalar("Model/Norm", get_model_norm(model), epoch)
        if make_poly_lr == 1:
            writer.add_scalar("Learning_rate/Epoch", g['lr'], epoch)
        
        wandb.log({"acc": acc, "acc_val": acc_val})

        if besst < acc_val:
            besst = acc_val
    
    json_file_path = "sweep.json"
    new_data = {"Acc Val": besst, "config": wandb.config.as_dict()}
    try:
        with open(json_file_path, "r") as json_file:
            existing_data = json.load(json_file)
    except FileNotFoundError:
        existing_data = {}

    new_key = f"entry_{len(existing_data)}"
    existing_data[new_key] = new_data
    
    with open(json_file_path, "w") as json_file:
        json.dump(existing_data, json_file, indent=4)
            
    wandb.finish()

In [16]:
sweep_config = {
    'method': 'random',
    'project': 'cifar-10',
    'parameters': {
        "batch_size": {
            "distribution": "int_uniform",
            "min": 256,
            "max": 512
        },
        'epochs': {
            "distribution": "int_uniform",
            "min": 75,
            "max": 125
        },
        "optimizer": {
            "values": [0, 1, 2, 3, 4]
        },
        "learning_rate": {
            "values": [5e-2, 1e-2, 5e-3, 1e-3, 5e-4, 1e-4]
        },
        "poly_lr": {
            "values": [0, 1]
        }
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, project='cifar-10')
wandb.agent(sweep_id, function=main_train, count=100)

Create sweep with ID: ob6iump8
Sweep URL: https://wandb.ai/razvanpanaite/cifar-10/sweeps/ob6iump8


[34m[1mwandb[0m: Agent Starting Run: 8g6snkq1 with config:
[34m[1mwandb[0m: 	batch_size: 480
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	optimizer: 1
[34m[1mwandb[0m: 	poly_lr: 0
[34m[1mwandb[0m: Currently logged in as: [33mrazvanpanaite[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|███████████████████████████████████████████████████████████████████████| 100/100 [02:01<00:00,  1.21s/it, Acc: 0.9309, Acc_val: 0.5159]


{'entry_0': {'Acc Val': 0.5194, 'config': {'batch_size': 480, 'epochs': 100, 'learning_rate': 0.0005, 'optimizer': 1, 'poly_lr': 0, 'dataset': 'CIFAR-10'}}}


VBox(children=(Label(value='0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁▂▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████████
acc_val,▁▄▅▆▆▇▇▇▇▇▇▇▇██▇████████████████████████

0,1
acc,0.9309
acc_val,0.5159


[34m[1mwandb[0m: Agent Starting Run: 7fi27hx5 with config:
[34m[1mwandb[0m: 	batch_size: 316
[34m[1mwandb[0m: 	epochs: 101
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	optimizer: 1
[34m[1mwandb[0m: 	poly_lr: 1


 59%|██████████████████████████████████████████▊                             | 60/101 [01:38<01:07,  1.64s/it, Acc: 0.9091, Acc_val: 0.5072]

In [1]:
import json

json_file_path = "sweep.json"

with open(json_file_path, "r") as json_file:
    existing_data = json.load(json_file)

In [2]:
maxim = 0
for it in existing_data:
    maxim = max(maxim, existing_data[it]['Acc Val'])
print(maxim)

0.5365
