In [1]:
%pip install torch torchvision datasets huggingface-hub matplotlib Pillow scikit-learn wandb -q

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from datasets import load_dataset
from huggingface_hub import HfApi, hf_hub_download
from PIL import Image
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
import copy
import wandb
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
BATCH_SIZE = 32
NUM_CLASSES = 10
EPOCHS = 10
LR = 1e-3
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BEST_MODEL_PATH = "best_model.pth"

HF_REPO_ID = "CV016/MDLOpsExam"
HF_TOKEN = "your_hf_token_here"

CLASS_NAMES = ["airplane", "bird", "car", "cat", "deer", "dog", "horse", "monkey", "ship", "truck"]

print(f"Device: {DEVICE}")

Device: cuda


In [4]:
ds = load_dataset("Chiranjeev007/STL-10_Subset")

train_data = ds["train"]
val_data = ds["validation"]
test_data = ds["test"]

print(f"Train: {len(train_data)}, Validation: {len(val_data)}, Test: {len(test_data)}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/723 [00:00<?, ?B/s]



data/train-00000-of-00001.parquet:   0%|          | 0.00/88.9M [00:00<?, ?B/s]

data/validation-00000-of-00001.parquet:   0%|          | 0.00/8.82M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/17.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Train: 5000, Validation: 500, Test: 1000


In [5]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

eval_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
class STL10HFDataset(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        sample = self.dataset[idx]
        image = sample["image"].convert("RGB")
        label = sample["label"]
        if self.transform:
            image = self.transform(image)
        return image, label

In [7]:
train_dataset = STL10HFDataset(train_data, transform=train_transform)
val_dataset = STL10HFDataset(val_data, transform=eval_transform)
test_dataset = STL10HFDataset(test_data, transform=eval_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [8]:
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 51.5MB/s]


In [10]:
wandb.init(project="STL10-ResNet18", name="training")

train_losses = []
val_losses = []
train_accs = []
val_accs = []
best_val_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100.0 * correct / total
    train_losses.append(train_loss)
    train_accs.append(train_acc)

    model.eval()
    val_running_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (preds == labels).sum().item()

    val_loss = val_running_loss / len(val_loader)
    val_acc = 100.0 * val_correct / val_total
    val_losses.append(val_loss)
    val_accs.append(val_acc)

    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "val_loss": val_loss,
        "train_accuracy": train_acc,
        "val_accuracy": val_acc,
    })

    print(f"Epoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} Train Acc: {train_acc:.2f}% "
          f"Val Loss: {val_loss:.4f} Val Acc: {val_acc:.2f}%")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_wts = copy.deepcopy(model.state_dict())

print(f"\nBest Validation Accuracy: {best_val_acc:.2f}%")
torch.save(best_model_wts, BEST_MODEL_PATH)
print(f"Best model saved to {BEST_MODEL_PATH}")

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 2


[34m[1mwandb[0m: You chose 'Use an existing W&B account'
[34m[1mwandb[0m: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: Create a new API key at: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Store your API key securely and do not share it.
[34m[1mwandb[0m: Paste your API key and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mb22bb016[0m ([33mb22bb016-prom-iit-rajasthan[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch [1/10] Train Loss: 0.2239 Train Acc: 92.78% Val Loss: 0.5765 Val Acc: 82.80%
Epoch [2/10] Train Loss: 0.2007 Train Acc: 93.06% Val Loss: 0.5884 Val Acc: 83.80%
Epoch [3/10] Train Loss: 0.1984 Train Acc: 93.84% Val Loss: 0.7268 Val Acc: 79.80%
Epoch [4/10] Train Loss: 0.1902 Train Acc: 94.30% Val Loss: 0.7195 Val Acc: 81.80%
Epoch [5/10] Train Loss: 0.2122 Train Acc: 92.56% Val Loss: 0.5446 Val Acc: 83.00%
Epoch [6/10] Train Loss: 0.1423 Train Acc: 94.84% Val Loss: 0.7164 Val Acc: 81.40%
Epoch [7/10] Train Loss: 0.1297 Train Acc: 95.64% Val Loss: 0.6924 Val Acc: 84.20%
Epoch [8/10] Train Loss: 0.1362 Train Acc: 95.54% Val Loss: 0.7124 Val Acc: 82.60%
Epoch [9/10] Train Loss: 0.1642 Train Acc: 94.38% Val Loss: 0.6988 Val Acc: 80.60%
Epoch [10/10] Train Loss: 0.1705 Train Acc: 94.12% Val Loss: 0.6060 Val Acc: 83.20%

Best Validation Accuracy: 84.20%
Best model saved to best_model.pth


In [11]:
epochs_range = range(1, EPOCHS + 1)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

ax1.plot(epochs_range, train_losses, label="Train Loss")
ax1.plot(epochs_range, val_losses, label="Val Loss")
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Loss")
ax1.set_title("Training / Validation Loss")
ax1.legend()

ax2.plot(epochs_range, train_accs, label="Train Acc")
ax2.plot(epochs_range, val_accs, label="Val Acc")
ax2.set_xlabel("Epoch")
ax2.set_ylabel("Accuracy (%)")
ax2.set_title("Training / Validation Accuracy")
ax2.legend()

plt.tight_layout()
plt.savefig("training_plots.png", dpi=150)
wandb.log({"training_plots": wandb.Image(fig)})
plt.show()
print("Plots saved and logged to WandB")

wandb.finish()

Plots saved and logged to WandB


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▂▂▄▅▁▆██▅▅
train_loss,█▆▆▅▇▂▁▁▄▄
val_accuracy,▆▇▁▄▆▄█▅▂▆
val_loss,▂▃██▁█▇▇▇▃

0,1
epoch,10.0
train_accuracy,94.12
train_loss,0.17054
val_accuracy,83.2
val_loss,0.60599


In [12]:
api = HfApi()

api.upload_file(
    path_or_fileobj=BEST_MODEL_PATH,
    path_in_repo="best_model.pth",
    repo_id=HF_REPO_ID,
    repo_type="model",
    token=HF_TOKEN,
)

api.upload_file(
    path_or_fileobj="training_plots.png",
    path_in_repo="training_plots.png",
    repo_id=HF_REPO_ID,
    repo_type="model",
    token=HF_TOKEN,
)

print(f"Model and plots pushed to https://huggingface.co/{HF_REPO_ID}")

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  best_model.pth              :   1%|1         |  567kB / 44.8MB            

Model and plots pushed to https://huggingface.co/CV016/MDLOpsExam


In [13]:
downloaded_path = hf_hub_download(
    repo_id=HF_REPO_ID,
    filename="best_model.pth",
    repo_type="model",
    token=HF_TOKEN,
)

eval_model = models.resnet18(weights=None)
eval_model.fc = nn.Linear(eval_model.fc.in_features, NUM_CLASSES)
eval_model.load_state_dict(torch.load(downloaded_path, map_location=DEVICE))
eval_model = eval_model.to(DEVICE)
eval_model.eval()

print(f"Model loaded from HuggingFace repo: {HF_REPO_ID}")

best_model.pth:   0%|          | 0.00/44.8M [00:00<?, ?B/s]

Model loaded from HuggingFace repo: CV016/MDLOpsExam


In [14]:
all_preds = []
all_labels = []
all_images = []
all_probs = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        outputs = eval_model(images)
        probs = torch.softmax(outputs, dim=1)
        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())
        all_images.extend(images.cpu())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

overall_acc = 100.0 * np.sum(all_preds == all_labels) / len(all_labels)
print(f"Test Accuracy: {overall_acc:.2f}%")

Test Accuracy: 86.70%


In [15]:
wandb.init(project="STL10-ResNet18", name="evaluation", reinit=True)

# --- Confusion Matrix on WandB ---
cm = confusion_matrix(all_labels, all_preds)
fig_cm, ax_cm = plt.subplots(figsize=(10, 8))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=CLASS_NAMES)
disp.plot(ax=ax_cm, cmap="Blues", xticks_rotation=45)
ax_cm.set_title("Confusion Matrix - Test Set")
plt.tight_layout()
wandb.log({"confusion_matrix": wandb.Image(fig_cm)})
plt.show()
print("Confusion matrix logged to WandB")



Confusion matrix logged to WandB


In [18]:
# --- Class-wise Accuracy Bar Plot on WandB ---
class_correct = np.zeros(NUM_CLASSES)
class_total = np.zeros(NUM_CLASSES)

for label, pred in zip(all_labels, all_preds):
    class_total[label] += 1
    if label == pred:
        class_correct[label] += 1

class_acc = 100.0 * class_correct / np.maximum(class_total, 1)

print("Class-wise Accuracy:")
for name, acc in zip(CLASS_NAMES, class_acc):
    print(f"  {name}: {acc:.2f}%")

class_acc_dict = {f"class_acc/{name}": acc for name, acc in zip(CLASS_NAMES, class_acc)}
wandb.log(class_acc_dict)

fig_bar, ax_bar = plt.subplots(figsize=(12, 6))
bars = ax_bar.bar(CLASS_NAMES, class_acc, color="steelblue")
ax_bar.set_xlabel("Class")
ax_bar.set_ylabel("Accuracy (%)")
ax_bar.set_title("Class-wise Accuracy")
ax_bar.set_ylim(0, 105)
for bar, acc in zip(bars, class_acc):
    ax_bar.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 1,
                f"{acc:.1f}%", ha="center", va="bottom", fontsize=9)
plt.xticks(rotation=45)
plt.tight_layout()
wandb.log({"class_wise_accuracy": wandb.Image(fig_bar)})
plt.show()
print("Class-wise accuracy bar plot logged to WandB")

Class-wise Accuracy:
  airplane: 93.00%
  bird: 87.00%
  car: 96.00%
  cat: 85.00%
  deer: 88.00%
  dog: 61.00%
  horse: 85.00%
  monkey: 94.00%
  ship: 90.00%
  truck: 88.00%
Class-wise accuracy bar plot logged to WandB


In [17]:
# --- 20 Samples (10 Correct, 10 Incorrect) on WandB ---
correct_indices = np.where(all_preds == all_labels)[0]
incorrect_indices = np.where(all_preds != all_labels)[0]

rng = np.random.default_rng(42)
selected_correct = rng.choice(correct_indices, size=min(10, len(correct_indices)), replace=False)
selected_incorrect = rng.choice(incorrect_indices, size=min(10, len(incorrect_indices)), replace=False)

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

def tensor_to_pil(img_tensor):
    img = img_tensor.numpy().transpose(1, 2, 0)
    img = img * std + mean
    img = np.clip(img, 0, 1)
    img = (img * 255).astype(np.uint8)
    return Image.fromarray(img)

wandb_images = []

for idx in selected_correct:
    pil_img = tensor_to_pil(all_images[idx])
    caption = f"Correct | Actual: {CLASS_NAMES[all_labels[idx]]} | Pred: {CLASS_NAMES[all_preds[idx]]}"
    wandb_images.append(wandb.Image(pil_img, caption=caption))

for idx in selected_incorrect:
    pil_img = tensor_to_pil(all_images[idx])
    caption = f"Incorrect | Actual: {CLASS_NAMES[all_labels[idx]]} | Pred: {CLASS_NAMES[all_preds[idx]]}"
    wandb_images.append(wandb.Image(pil_img, caption=caption))

wandb.log({"sample_predictions": wandb_images})
print("20 sample predictions logged to WandB")

20 sample predictions logged to WandB


In [None]:
wandb.finish()
print("WandB run finished")