In [None]:
!pip install wandb



In [None]:
import wandb
import numpy as np

loss = np.random.rand()

#1. Organize your hyperparameters
config = {'learning_rate': 0.001}

#2. To start wandb run
wandb.init(project='dlai_intro' , config=config)

#3. log metrics over time to visualize
wandb.log ({"loss": loss})

wandb.finish()

VBox(children=(Label(value='0.003 MB of 0.013 MB uploaded\r'), FloatProgress(value=0.2573228107451814, max=1.0…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113094111108997, max=1.0…

VBox(children=(Label(value='0.010 MB of 0.010 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
loss,▁

0,1
loss,0.34994


**1. Importing Libraries**

In [None]:
import math
from pathlib import Path
from types import SimpleNamespace
from tqdm.auto import tqdm
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
import wandb
import random

**2. Constants and Configuration**

In [None]:
INPUT_SIZE = 28 * 28
OUTPUT_SIZE = 10
HIDDEN_SIZE = 256
DATA_DIR = Path('./data/')
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def get_model(dropout):
    "Simple MLP with Dropout"
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(INPUT_SIZE, HIDDEN_SIZE),
        nn.BatchNorm1d(HIDDEN_SIZE),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE)
    ).to(DEVICE)

In [None]:
# Let's define a config object to store our hyperparameters
config = SimpleNamespace(
    epochs=2,
    batch_size=128,
    lr=1e-5,
    dropout=0.5,
    slice_size=10000,
    valid_pct=0.2
)

**3. Utility Functions**

In [None]:
def get_dataloaders(data_dir, batch_size, slice_size, valid_pct):
    "Get data loaders for training and validation"
    dataset = MNIST(root=data_dir, train=True, transform=ToTensor(), download=True)

    # Create a subset for training and validation
    train_size = int(len(dataset) * (1 - valid_pct))
    train_dataset, valid_dataset = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size * 2)

    return train_loader, valid_loader


def train_model(config):
    "Train a model with a given config"
    wandb.init(project="dlai_intro", config=config)

    # Create the data directory if it doesn't exist
    DATA_DIR.mkdir(parents=True, exist_ok=True)

    # Get the data loaders
    train_loader, valid_loader = get_dataloaders(DATA_DIR,
                                                 config.batch_size,
                                                 config.slice_size,
                                                 config.valid_pct)
    n_steps_per_epoch = math.ceil(len(train_loader.dataset) / config.batch_size)

    # Initialize model, loss function, and optimizer
    model = get_model(config.dropout)
    loss_func = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=config.lr)

    example_ct = 0

    for epoch in tqdm(range(config.epochs), total=config.epochs):
        model.train()

        for step, (images, labels) in enumerate(train_loader):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            outputs = model(images)
            train_loss = loss_func(outputs, labels)
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            example_ct += len(images)
            metrics = {
                "train/train_loss": train_loss,
                "train/epoch": epoch + 1,
                "train/example_ct": example_ct
            }
            wandb.log(metrics)

        # Compute validation metrics
        val_loss, accuracy = validate_model(model, valid_loader, loss_func)
        val_metrics = {
            "val/val_loss": val_loss,
            "val/val_accuracy": accuracy
        }
        wandb.log(val_metrics)

    wandb.finish()

def validate_model(model, valid_loader, loss_func):
    "Compute the performance of the model on the validation dataset"
    model.eval()
    val_loss = 0.0
    correct = 0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            val_loss += loss_func(outputs, labels) * labels.size(0)
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

    return val_loss / len(valid_loader.dataset), correct / len(valid_loader.dataset)

**4. Training**

In [None]:
wandb.login(anonymous="allow")
train_model(config)

  0%|          | 0/2 [00:00<?, ?it/s]

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁████████████████████
train/example_ct,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/train_loss,█▇▇▇▆▇▆▆▅▅▅▄▄▄▅▄▄▃▃▃▃▃▂▃▃▂▃▃▂▂▂▁▁▁▁▁▁▁▁▁
val/val_accuracy,▁█
val/val_loss,█▁

0,1
train/epoch,2.0
train/example_ct,96000.0
train/train_loss,1.04001
val/val_accuracy,0.82267
val/val_loss,0.99674


In [None]:
config.lr = 1e-4
train_model(config)

  0%|          | 0/2 [00:00<?, ?it/s]

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁████████████████████
train/example_ct,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/train_loss,█▆▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▂▂▁▁▂▂▂▁▁▁▂▁▁▂▁▁▁▁▁▁
val/val_accuracy,▁█
val/val_loss,█▁

0,1
train/epoch,2.0
train/example_ct,96000.0
train/train_loss,0.36855
val/val_accuracy,0.92292
val/val_loss,0.30708


In [None]:
config.dropout = 0.1
config.epochs = 1
train_model(config)

  0%|          | 0/1 [00:00<?, ?it/s]

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/example_ct,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/train_loss,█▇▆▅▅▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▁▁▁▁▂▁▁▁▁
val/val_accuracy,▁
val/val_loss,▁

0,1
train/epoch,1.0
train/example_ct,48000.0
train/train_loss,0.43649
val/val_accuracy,0.90633
val/val_loss,0.39779


In [None]:
def get_dataloaders(data_dir, batch_size, slice_size, valid_pct):
    "Get data loaders for training and validation"
    dataset = MNIST(root=data_dir, train=True, transform=ToTensor(), download=True)

    # Create a subset for training and validation
    train_size = int(len(dataset) * (1 - valid_pct))
    train_dataset, valid_dataset = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size * 2)

    return train_loader, valid_loader


def train_model(config):
    "Train a model with a given config"
    wandb.init(project="dlai_intro", config=config)

    # Create the data directory if it doesn't exist
    DATA_DIR.mkdir(parents=True, exist_ok = True)

    # Get the data loaders
    train_loader, valid_loader = get_dataloaders(DATA_DIR,
                                                 config.batch_size,
                                                 config.slice_size,
                                                 config.valid_pct)
    n_steps_per_epoch = math.ceil(len(train_loader.dataset) / config.batch_size)

    # Initialize model, loss function, and optimizer
    model = get_model(config.dropout)
    loss_func = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=config.lr)

    example_ct = 0

    for epoch in tqdm(range(config.epochs), total=config.epochs):
        model.train()

        for step, (images, labels) in enumerate(train_loader):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            outputs = model(images)
            train_loss = loss_func(outputs, labels)
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            example_ct += len(images)
            metrics = {
                "train/train_loss": train_loss,
                "train/epoch": epoch + 1,
                "train/example_ct": example_ct
            }
            wandb.log(metrics)

        # Compute validation metrics
        val_loss, accuracy = validate_model(model, valid_loader, loss_func)
        val_metrics = {
            "val/val_loss": val_loss,
            "val/val_accuracy": accuracy
        }
        wandb.log(val_metrics)

    wandb.finish()

def validate_model(model, valid_loader, loss_func):
    "Compute the performance of the model on the validation dataset"
    model.eval()
    val_loss = 0.0
    correct = 0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            val_loss += loss_func(outputs, labels) * labels.size(0)
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

    return val_loss / len(valid_loader.dataset), correct / len(valid_loader.dataset)