<a href="https://colab.research.google.com/github/SauravMaheshkar/Hot-Dog-Not-Hot-Dog/blob/main/notebooks/Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This Colab Notebook accompanies the Activeloop Blogpost titled "<>".

# Packages 📦 and Basic Setup

In [None]:
%%capture
!pip install hub
!pip install --upgrade scikit-learn

import os
import hub
import glob
import torch
import PIL.Image
import numpy as np
import torch.optim as optim
import matplotlib.pyplot as plt
from skimage import img_as_float
from skimage.transform import resize
from hub.schema import ClassLabel, Image
from torchvision import models, transforms

N_CLASSES = 2

To use your own datasets, you'll need to register on Activeloop platform. After registering, uncomment this line and login using your username and password.

In [None]:
# !hub login

# Advanced 🤓 Transformations

In [None]:
%%time
# Fetch the original Dataset
train_dataset = hub.Dataset("sauravmaheshkar/hot-dog-not-hot-dog-train")

# Schema with New Image Size
new_schema = {
    "resized_image": Image(shape=(224, 224, 3), dtype="uint8"),
    "label": ClassLabel(num_classes=2)
}

# Transformation Pipeline
tfms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

# New hub transform for resizing and normalization
@hub.transform(schema=new_schema, scheduler="threaded", workers=8)
def resize_transform(index):
    image = resize(train_dataset['image', index].compute(), (224, 224, 3), anti_aliasing=True)
    image = tfms(image)
    image = img_as_float(image.view(224, 224, 3)) 
    label = int(train_dataset['label', index].compute())
    return {
        "resized_image": image,
        "label": label
    }

# New Resized Dataset instance
resized_ds = resize_transform(range(498))

# Upload Resized Dataset to Hub
url = "sauravmaheshkar/resized-hot-dog-not-hot-dog"
pytorch_dataset = resized_ds.store(url)
pytorch_dataset.flush()

# The Model 👷‍♀️

In [None]:
%%capture

# Import Resnet18 from torchvision.models
model = models.resnet18(pretrained=True)

# Add a Pooling Layer
model.avgpool = torch.nn.AdaptiveAvgPool2d(1)

# Add a FC Layer
model.fc = torch.nn.Linear(model.fc.in_features, N_CLASSES)

# Training 💪🏻

In [None]:
# Fetch Resized Dataset
pytorch_dataset = hub.Dataset("sauravmaheshkar/resized-hot-dog-not-hot-dog")

# Convert to Pytorch Compatible Format with output type as list
pytorch_dataset = pytorch_dataset.to_pytorch(output_type = list)

# Creating DataLoader
train_loader = torch.utils.data.DataLoader(pytorch_dataset, batch_size=32, num_workers=2)

# Some Hyperparameters
n_epochs = 20
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

# Training
for epoch in range(n_epochs):
    print(f"Epoch {epoch}")
    # Setting Running Loss to Zero
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # Get image, label pair
        X, y = data
        # Convert into proper format, dtype and move to device
        y = y.type(torch.LongTensor)
        y = y.to(device)
        X = X.permute(0, 3, 1, 2).float()
        X = X.to(device)
        # Set gradients to Zero
        optimizer.zero_grad()
        # Get output from the model
        outputs = model(X)
        # Calculate the loss
        loss = criterion(outputs, y)
        # Perform Backprop
        loss.backward()
        optimizer.step()

        # Update the Loss
        running_loss += loss.item()
    print(f"Loss {loss.item()}")
print("Finished Training")