<a href="https://colab.research.google.com/github/MsPery/colabwork_1/blob/main/Copy_of_Peris_Odhiambo_W%26B_and_African_Leadership_University.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img src="https://wandb.me/logo-im-png" width="400" alt="Weights & Biases" />
<!--- @wandbcode{fmnist-alu} -->

Use Weights & Biases for machine learning experiment tracking, dataset versioning, and project collaboration.


<img src="https://wandb.me/mini-diagram" width="650" alt="Weights & Biases" />


## What this notebook covers with Weights and Biases:
* Using W&B to save and download your data
* Exploratory Data Analysis (EDA)
* Metrics logging 

# ✅ Sign Up

Sign up to a free [Weights & Biases account here](https://wandb.ai/signup)

[Weights and Biases docs](https://docs.wandb.ai/quickstart)

# Kaggle Competition Page

[Submit to the Competition here](https://www.kaggle.com/competitions/fashion-mnist-african-leadership-university)

# 🚀 Installing and importing

In [None]:
!pip install -q --upgrade wandb
!pip install -qq timm

In [None]:
import pandas as pd
import numpy as np

import math
import timm
import wandb
import random
from pathlib import Path
import torch, torchvision
import torch.nn as nn
import torchvision as tv
import torchvision.transforms as T
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader

Set some constants 

In [None]:
PROJECT = 'fashion-mnist-alu'
DATA_DIR = Path('data/')
ARTIFACT_PATH = 'wandb/fashion_mnist/FashionMnist:latest'
BS = 128

# 💾 Data
#### Download the data from W&B Artifacts
Train, validation and test images will be downloaded, as well as train and validation labels


In [None]:
wandb.init(project=PROJECT, job_type='download_dataset')

artifact = wandb.use_artifact(ARTIFACT_PATH, type='dataset')

artifact_dir = artifact.download(DATA_DIR)

wandb.finish()

## Prepare the Datasets and Dataloaders

In [None]:
class TensorDataset:
    "A simple Tensor dataset that supports transforms"
    def __init__(self, images, labels=None, tfms=T.ConvertImageDtype(torch.float)):
        self.images = images
        self.labels = labels
        self.tfms = tfms

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[[idx]]
        img = self.tfms(img)
        if self.labels is not None:
            return img, self.labels[idx].long()
        else:
            return img

In [None]:
class FashionMNIST:
    
    tfms = {"train": T.Compose([T.Resize(32), T.ConvertImageDtype(torch.float)]),
            "valid": T.Compose([T.Resize(32), T.ConvertImageDtype(torch.float)])}
    
    def __init__(self, data_dir=DATA_DIR):
        self.ds = torch.load(data_dir/"fashion_mnist.pt")
        self.train_ds = TensorDataset(self.ds["train"]["data"], self.ds["train"]["labels"], self.tfms["train"])
        self.valid_ds = TensorDataset(self.ds["valid"]["data"], self.ds["valid"]["labels"], self.tfms["valid"])
        self.test_ds = TensorDataset(self.ds["test"]["data"], tfms=self.tfms["valid"])
    
    def dataloaders(self, bs=128, num_workers=2):
        train_dataloader = DataLoader(self.train_ds, batch_size=bs, shuffle=True, num_workers=num_workers)
        valid_dataloader = DataLoader(self.valid_ds, batch_size=bs*2, shuffle=False, 
                                      num_workers=num_workers)
        test_dataloader = DataLoader(self.test_ds, batch_size=bs*2, shuffle=False, 
                                      num_workers=num_workers)
        return train_dataloader, valid_dataloader, test_dataloader

In [None]:
datasets = FashionMNIST(DATA_DIR)
train_dl, valid_dl, test_dl = datasets.dataloaders(bs=BS)

grab one batch of data

In [None]:
x, y = next(iter(train_dl))
x.shape, y.shape

In [None]:
def validate_model(model, valid_dl, loss_func, log_images=False, num_classes=10):
    "Compute performance of the model on the validation dataset and log a wandb.Table"
    model.eval()
    val_loss = 0.
    with torch.inference_mode():
        correct = 0
        for i, (images, labels) in enumerate(valid_dl):
            images, labels = images.to(device), labels.to(device)

            # Forward pass ➡
            outputs = model(images)
            val_loss += loss_func(outputs, labels)*labels.size(0)

            # Compute accuracy and accumulate
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

            # Log validation predictions and images to the dashboard
            if log_images:
              if i ==0:
                # 🐝 Create a wandb Table to log images, labels and predictions to
                table = wandb.Table(columns=["image", "label", "pred"]+[f"score_{i}" for i in range(num_classes)])
              
              probs = outputs.softmax(dim=1)
              for img, label, pred, prob in zip(images.to("cpu"), labels.to("cpu"), predicted.to("cpu"),  probs.to("cpu")):
                  # table.add_data(wandb.Image(img[0].numpy()*255), pred, targ, *prob.numpy())
                  table.add_data(wandb.Image(img[0].numpy()), label, pred, *prob.numpy())
        
        if log_images:
          wandb.log({"val_table/predictions_table":table}, commit=False)

    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)

Initialise the Datasets and Dataloaders

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

In [None]:
len(train_dl), len(valid_dl), len(test_dl)

# 👟 Train

In [None]:
LR = 1e-3
EPOCHS = 10

# Log the final results on the validation set
LOG_IMAGES=True

Get Model

In [None]:
MODEL_NAME = 'resnet10t'

model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=10, in_chans=1)
model = model.to(device)

🐝 initialise a wandb run

In [None]:
wandb.init(project=PROJECT, config={"epochs": EPOCHS, "batch_size": BS, "lr": LR})

# Add additional configs to wandb if needed
wandb.config['len_train'] = len(datasets.train_ds)
wandb.config['len_val'] = len(datasets.valid_ds)

Start Training

In [None]:
# Copy your config 
config = wandb.config

# Get the data
n_steps_per_epoch = math.ceil(len(train_dl.dataset) / config.batch_size)

# Make the loss and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

# Training
example_ct = 1
step_ct = 1
for epoch in tqdm(range(config.epochs)):
    model.train()
    for step, (images, labels) in enumerate(tqdm(train_dl, leave=False)):
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        train_loss = loss_func(outputs, labels)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        example_ct += len(images)
        metrics = {"train/train_loss": train_loss, 
                    "train/epoch": (step + 1 + (n_steps_per_epoch * epoch)) / n_steps_per_epoch, 
                    "train/example_ct": example_ct}
        
        if step + 1 < n_steps_per_epoch:
            # 🐝 Log train metrics to wandb 
            wandb.log(metrics)
            
        step_ct += 1
    
    # log validation images and predictions on last epoch
    if LOG_IMAGES:
        log_images = epoch==(config.epochs-1)
    else:
        log_images = False

    # Do validation and maybe log images to Tables
    val_loss, accuracy = validate_model(model, valid_dl, loss_func, log_images=log_images)

    # 🐝 Log train and validation metrics to wandb
    val_metrics = {"val/val_loss": val_loss, 
                    "val/val_accuracy": accuracy}
    wandb.log({**metrics, **val_metrics})
    
    print(f"Train Loss: {train_loss:.3f}, Valid Loss: {val_loss:.3f}, Valid Accuracy: {accuracy:.2f}")

# Save trained model to disk and to W&B Artifacts
model_fn = f'{MODEL_NAME}_model.pt'
torch.save(model, model_fn)
wandb.log_artifact(model_fn, f'{MODEL_NAME}_model', type='model')

# Generate Test Submission
Generate Test Predictions and Log Submission File

In [None]:
preds = []
for step, images in enumerate(tqdm(test_dl, leave=False)):
    images = images.to(device)
    outputs = model(images)
    preds.append(outputs.argmax(1).cpu().numpy().tolist())

preds = [p for ps in preds for p in ps]
len(preds)

In [None]:
submission_df = pd.DataFrame({'Id':list(range(len(preds))), 'Category':preds})
submission_df.to_csv('submission.csv', index=False)

Log your submission file to your wandb run

In [None]:
wandb.log_artifact('submission.csv', 'submission_file', type='submission')

🐝 Close your wandb run

In [None]:
wandb.finish()

# 🪄 More from W&B

#### 📏 Best Practices

1. **Projects**: Log multiple runs to a project to compare them. `wandb.init(project="project-name")`
2. **Groups**: For multiple processes or cross validation folds, log each process as a run and group them together. `wandb.init(group='experiment-1')`
3. **Tags**: Add tags to track your current baseline or production model.
4. **Notes**: Type notes in the table to track the changes between runs.
5. **Reports**: Take quick notes on progress to share with colleagues and make dashboards and snapshots of your ML projects.

# What's next 🚀 ?
The next tutorial you will learn how to do hyperparameter optimization using W&B Sweeps:
## 👉 [Hyperparameters sweeps using PyTorch](https://colab.research.google.com/github/wandb/examples/blob/master/colabs/pytorch/Organizing_Hyperparameter_Sweeps_in_PyTorch_with_W%26B.ipynb)