#Install & Set Up Wandb
To manage AI model development. Features include training, fine-tuning, reporting, automating hyperparameter sweeps, and utilizing the model registry for versioning and reproducibility.

In [None]:
!pip install --upgrade wandb

Collecting wandb
  Downloading wandb-0.24.2-py3-none-manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading wandb-0.24.2-py3-none-manylinux_2_28_x86_64.whl (23.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.0/23.0 MB[0m [31m50.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: wandb
  Attempting uninstall: wandb
    Found existing installation: wandb 0.24.1
    Uninstalling wandb-0.24.1:
      Successfully uninstalled wandb-0.24.1
Successfully installed wandb-0.24.2


 Login to Wandb

Authenticate with your Wandb account:

In [None]:
import wandb
wandb.login()

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 2


[34m[1mwandb[0m: You chose 'Use an existing W&B account'
[34m[1mwandb[0m: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: Create a new API key at: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Store your API key securely and do not share it.
[34m[1mwandb[0m: Paste your API key and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mhardik-jain[0m ([33mhardik-jain-iit-jodhpur[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

#Initialize a Wandb Run
Create a Wandb project to track experiments.

In [None]:
wandb.init(
    project="MLOps-L06-image-classification",  # Change project name as needed
    config={
        "lr": 0.001,
        "epochs": 5,
        "batch_size": 64,
        "model_type": "CNN",
    }
)
config = wandb.config  # Retrieve experiment config

 Load CIFAR-10 Dataset

*   We load CIFAR-10 and normalize it for training

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Transform: Convert images to tensors and normalize
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load training & test data
train_set = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=config.batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=config.batch_size, shuffle=False)


100%|██████████| 170M/170M [00:02<00:00, 80.3MB/s]


Define a CNN Model

We create a simple Convolutional Neural Network (CNN) for image classification.

In [None]:
import torch.nn as nn
import torch.optim as optim

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2, 2)
        x = x.view(-1, 32 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

Train & Log Metrics in WandB



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import wandb

def train_model(config_dict):
    wandb.init(project="MLOps-L06-image-classification", config=config_dict)
    config = dict(wandb.config)  # Safe way to access config values

    model = SimpleCNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])

    table = wandb.Table(columns=["Image", "True Label", "Predicted Label"])

    for epoch in range(config["epochs"]):
        model.train()
        running_loss = 0.0

        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:
                wandb.log({"training_loss": running_loss / 100, "epoch": epoch + 1})
                running_loss = 0.0

        # Evaluation
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                for i in range(images.size(0)):
                    img = wandb.Image(images[i])
                    table.add_data(img, labels[i].item(), predicted[i].item())

        accuracy = 100 * correct / total
        wandb.log({"test_accuracy": accuracy, "epoch": epoch + 1, "Live Predictions": table})

    # Save and log model
    torch.save(model.state_dict(), "model.pth")
    artifact = wandb.Artifact("simple_cnn_model", type="model")
    artifact.add_file("model.pth")
    wandb.log_artifact(artifact)
    wandb.finish()

# Example configuration dictionary
config = {
    "lr": 0.001,
    "epochs": 5
}

train_model(config)



0,1
epoch,▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆████████
test_accuracy,▁▅▇▇█
training_loss,█▆▅▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,5.0
test_accuracy,68.88
training_loss,0.77126


# Hyperparameter Optimization with Sweeps

To automate hyperparameter search and visualize rich, interactive experiment tracking.


In [None]:
sweep_config = {
    "method": "grid",  # Options: "random", "grid", "bayes"
    "metric": {"name": "test_accuracy", "goal": "maximize"},
    "parameters": {
        "lr": {"values": [0.001, 0.0005, 0.0001]},
        "epochs": {"values": [5, 10]},
    },
}
sweep_id = wandb.sweep(sweep_config, project="MLOps-L06-image-classification")


Create sweep with ID: nmj0tkdd
Sweep URL: https://wandb.ai/hardik-jain-iit-jodhpur/MLOps-L06-image-classification/sweeps/nmj0tkdd


Define Sweep Function

In [None]:
def sweep_train():
    with wandb.init() as run:
        config = run.config  # Load sweep-config parameters

        model = SimpleCNN()
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.lr)

        best_accuracy = 0.0  # Track the highest accuracy
        best_model_path = "best_model.pth"  # File to store the best model

        for epoch in range(config.epochs):
            model.train(True)
            running_loss = 0.0

            for i, (inputs, labels) in enumerate(train_loader):
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 100 == 99:
                    wandb.log({"training_loss": running_loss / 100, "epoch": epoch + 1})
                    running_loss = 0.0

            # Validation phase
            model.eval()
            correct, total = 0, 0
            with torch.no_grad():
                for images, labels in test_loader:
                    outputs = model(images)
                    _, predicted = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            accuracy = 100 * correct / total
            wandb.log({"test_accuracy": accuracy, "epoch": epoch + 1})

            # Save model if it's the best so far
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                torch.save(model.state_dict(), best_model_path)  # Save locally

        # Log the best model as a Wandb artifact
        artifact = wandb.Artifact("best_model", type="model")
        artifact.add_file(best_model_path)
        wandb.log_artifact(artifact)

        wandb.finish()  # Close Wandb properly


Run the Sweep

In [None]:
wandb.agent(sweep_id, function=sweep_train, count=3)  # Runs 3 experiments