[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DalasNoin/arena/blob/main/w2/wandbex.ipynb)

In [4]:
# !pip install fancy_einsum
# !pip install einops pandas plotly
#  ! wget https://raw.githubusercontent.com/callummcdougall/arena-v1/main/w2d1/utils.py
import torch as t
from torch import nn
from torchvision import datasets, transforms
import torchvision
from torchvision.models import resnet34
from torch.utils.data import DataLoader
from fancy_einsum import einsum
from typing import Union, Optional, Callable
import numpy as np
from einops import rearrange
from tqdm.notebook import tqdm_notebook
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import time
import wandb
import utils

keyfile = "keystore.yaml"
if not wandb_key and os.path.exists(keyfile):
    import yaml
    keys = yaml.safe_load(open(keyfile,"r"))
    wandb_key = keys["wandb"]
os.environ["WANDB_API_KEY"] = wandb_key

device = "cuda" if t.cuda.is_available() else "cpu"

In [2]:
cifar_mean = [0.485, 0.456, 0.406]
cifar_std = [0.229, 0.224, 0.225]

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=cifar_mean, std=cifar_std)
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

utils.show_cifar_images(trainset, rows=3, cols=5)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
def train(trainset, testset, epochs: int, loss_fn: Callable, batch_size: int, lr: float) -> tuple[list, list]:

    model = resnet34().to(device).train()
    optimizer = t.optim.Adam(model.parameters(), lr=lr)

    loss_list = []
    accuracy_list = []

    trainloader = DataLoader(trainset, shuffle=True, batch_size=batch_size)
    testloader = DataLoader(testset, shuffle=True, batch_size=batch_size)

    for epoch in range(epochs):

        progress_bar = tqdm_notebook(trainloader)

        for (x, y) in progress_bar:

            x = x.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
            loss.backward()
            optimizer.step()

            loss_list.append(loss.item())

            progress_bar.set_description(f"Epoch = {epoch}, Loss = {loss.item():.4f}")

        with t.inference_mode():

            accuracy = 0
            total = 0

            for (x, y) in testloader:

                x = x.to(device)
                y = y.to(device)

                y_hat = model(x)
                y_predictions = y_hat.argmax(1)
                accuracy += (y_predictions == y).sum().item()
                total += y.size(0)

            accuracy_list.append(accuracy/total)

        print(f"Epoch {epoch+1}/{epochs}, train loss is {loss:.6f}, accuracy is {accuracy}/{total}")

    filename = "./w0d3_resnet.pt"
    print(f"Saving model to: {filename}")
    t.save(model.state_dict(), filename)

    utils.plot_results(loss_list, accuracy_list)
    return loss_list, accuracy_list

epochs = 1
loss_fn = nn.CrossEntropyLoss()
batch_size = 128
lr = 0.001

loss_list, accuracy_list = train(trainset, testset, epochs, loss_fn, batch_size, lr)

  0%|          | 0/391 [00:00<?, ?it/s]

Epoch 1/1, train loss is 1.184148, accuracy is 5839/10000
Saving model to: ./w0d3_resnet.pt


In [8]:
def train(trainset, testset, epochs: int, loss_fn: Callable, batch_size: int, lr: float) -> None:

    config_dict = {
        "batch_size": batch_size,
        "epochs": epochs,
        "lr": lr,
    }
    wandb.init(project="w2d1_resnet", config=config_dict)

    model = resnet34().to(device).train()
    optimizer = t.optim.Adam(model.parameters(), lr=lr)

    examples_seen = 0
    start_time = time.time()

    trainloader = DataLoader(trainset, shuffle=True, batch_size=batch_size)
    testloader = DataLoader(testset, shuffle=True, batch_size=batch_size)

    wandb.watch(model, criterion=loss_fn, log="all", log_freq=10, log_graph=True)

    for epoch in range(epochs):

        progress_bar = tqdm_notebook(trainloader)

        for (x, y) in progress_bar:

            x = x.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
            loss.backward()
            optimizer.step()

            progress_bar.set_description(f"Epoch = {epoch}, Loss = {loss.item():.4f}")

            examples_seen += len(x)
            wandb.log({"train_loss": loss, "elapsed": time.time() - start_time}, step=examples_seen)

        with t.inference_mode():

            accuracy = 0
            total = 0

            for (x, y) in testloader:

                x = x.to(device)
                y = y.to(device)

                y_hat = model(x)
                y_predictions = y_hat.argmax(1)
                accuracy += (y_predictions == y).sum().item()
                total += y.size(0)

            wandb.log({"test_accuracy": accuracy/total}, step=examples_seen)

    filename = f"{wandb.run.dir}/model_state_dict.pt"
    print(f"Saving model to: {filename}")
    t.save(model.state_dict(), filename)
    wandb.save(filename)

train(trainset, testset, epochs, loss_fn, batch_size, lr)

VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.107794…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01670413681667924, max=1.0)…

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


  0%|          | 0/391 [00:00<?, ?it/s]

Saving model to: /Users/YaoLu/src/github.com/dalasnoin/arena/w2/wandb/run-20221101_170951-3oarz9rg/files/model_state_dict.pt




In [10]:
def train() -> None:

    wandb.init()

    epochs = wandb.config.epochs
    batch_size = wandb.config.batch_size
    lr = wandb.config.lr

    model = resnet34().to(device).train()
    optimizer = t.optim.Adam(model.parameters(), lr=lr)

    examples_seen = 0
    start_time = time.time()

    trainloader = DataLoader(trainset, shuffle=True, batch_size=batch_size)
    testloader = DataLoader(testset, shuffle=True, batch_size=batch_size)

    wandb.watch(model, criterion=loss_fn, log="all", log_freq=10, log_graph=True)

    for epoch in range(epochs):

        progress_bar = tqdm_notebook(trainloader)

        for (x, y) in progress_bar:

            x = x.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
            loss.backward()
            optimizer.step()

            progress_bar.set_description(f"Epoch = {epoch}, Loss = {loss.item():.4f}")

            examples_seen += len(x)
            wandb.log({"train_loss": loss, "elapsed": time.time() - start_time}, step=examples_seen)


        with t.inference_mode():

            accuracy = 0
            total = 0

            for (x, y) in testloader:

                x = x.to(device)
                y = y.to(device)

                y_hat = model(x)
                y_predictions = y_hat.argmax(1)
                accuracy += (y_predictions == y).sum().item()
                total += y.size(0)

            wandb.log({"test_accuracy": accuracy/total}, step=examples_seen)

        print(f"Epoch {epoch+1}/{epochs}, train loss is {loss:.6f}, accuracy is {accuracy}/{total}")

    filename = f"{wandb.run.dir}/model_state_dict.pt"
    print(f"Saving model to: {filename}")
    t.save(model.state_dict(), filename)
    wandb.save(filename)

sweep_config = {
    'method': 'random',
    'name': 'w2d1_resnet_sweep_2',
    'metric': {'name': 'test_accuracy', 'goal': 'maximize'},
    'parameters': 
    {
        'batch_size': {'values': [64, 128, 256]},
        'epochs': {'min': 1, 'max': 3},
        'lr': {'max': 0.1, 'min': 0.0001, 'distribution': 'log_uniform_values'}
     }
}

sweep_id = wandb.sweep(sweep=sweep_config, project='w2d1_resnet')

wandb.agent(sweep_id=sweep_id, function=train, count=2)

Create sweep with ID: 3z1pj8az
Sweep URL: https://wandb.ai/dalasnoin/w2d1_resnet/sweeps/3z1pj8az


[34m[1mwandb[0m: Agent Starting Run: aq4u6apq with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	lr: 0.0008642602367494391


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016687805550009215, max=1.0…

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


  0%|          | 0/391 [00:00<?, ?it/s]

Epoch 1/2, train loss is 1.222985, accuracy is 5972/10000


  0%|          | 0/391 [00:00<?, ?it/s]

Epoch 2/2, train loss is 0.934505, accuracy is 6663/10000
Saving model to: /Users/YaoLu/src/github.com/dalasnoin/arena/w2/wandb/run-20221101_205610-aq4u6apq/files/model_state_dict.pt




0,1
elapsed,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
test_accuracy,▁█
train_loss,█▆▅▅▄▄▄▄▃▃▄▃▃▃▃▂▃▂▃▂▂▂▂▂▁▂▂▂▁▂▂▂▂▂▂▁▁▁▁▂

0,1
elapsed,1076.82234
test_accuracy,0.6663
train_loss,0.9345


[34m[1mwandb[0m: Agent Starting Run: na8ipr4b with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	lr: 0.01118885674533271


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


  0%|          | 0/391 [00:00<?, ?it/s]

Epoch 1/2, train loss is 1.647678, accuracy is 3595/10000


  0%|          | 0/391 [00:00<?, ?it/s]

Epoch 2/2, train loss is 1.263677, accuracy is 5064/10000
Saving model to: /Users/YaoLu/src/github.com/dalasnoin/arena/w2/wandb/run-20221101_211503-na8ipr4b/files/model_state_dict.pt




VBox(children=(Label(value='6.045 MB of 83.298 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.07257…

0,1
elapsed,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_accuracy,▁█
train_loss,█▄▅▄▃▃▃▃▃▂▃▃▄▃▃▃▂▃▂▃▂▂▂▂▂▁▁▂▁▁▁▂▁▁▁▁▁▂▂▂

0,1
elapsed,1068.21848
test_accuracy,0.5064
train_loss,1.26368
