In [None]:
# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+
try:
    import torch
    import torchvision
    assert int(torch.__version__.split(".")[1]) >= 12, "torch version should be 1.12+"
    assert int(torchvision.__version__.split(".")[1]) >= 13, "torchvision version should be 0.13+"
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")
except:
    print(f"[INFO] torch/torchvision versions not as required, installing nightly versions.")
    !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
    import torch
    import torchvision
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")

[INFO] torch/torchvision versions not as required, installing nightly versions.
Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu113
Collecting torch
  Downloading torch-2.7.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting torchvision
  Downloading torchvision-0.22.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading torchaudio-2.7.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.6 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.6.77 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.6.77 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.6.80 (from torch)
  Downloading nvidia_cuda_cupt

In [None]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

# Try to get torchinfo, install it if it doesn't work
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

# Try to import the going_modular directory, download it from GitHub if it doesn't work
try:
    from going_modular.going_modular import data_setup, engine
    from helper_functions import download_data, set_seeds, plot_loss_curves
except:
    # Get the going_modular scripts
    print("[INFO] Couldn't find going_modular or helper_functions scripts... downloading them from GitHub.")
    !git clone https://github.com/mrdbourke/pytorch-deep-learning
    !mv pytorch-deep-learning/going_modular .
    !mv pytorch-deep-learning/helper_functions.py . # get the helper_functions.py script
    !rm -rf pytorch-deep-learning
    from going_modular.going_modular import data_setup, engine
    from helper_functions import download_data, set_seeds, plot_loss_curves

[INFO] Couldn't find torchinfo... installing it.
[INFO] Couldn't find going_modular or helper_functions scripts... downloading them from GitHub.
Cloning into 'pytorch-deep-learning'...
remote: Enumerating objects: 4393, done.[K
remote: Counting objects: 100% (1534/1534), done.[K
remote: Compressing objects: 100% (133/133), done.[K
remote: Total 4393 (delta 1457), reused 1401 (delta 1401), pack-reused 2859 (from 2)[K
Receiving objects: 100% (4393/4393), 650.71 MiB | 17.14 MiB/s, done.
Resolving deltas: 100% (2659/2659), done.
Updating files: 100% (248/248), done.


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# create resnet def
def create_resnet_model(num_classes: int = 3,
                        seed: int = 42,
                        device = 'cuda' if torch.cuda.is_available() else 'cpu'):

    weights = torchvision.models.ResNet50_Weights.DEFAULT
    transform = weights.transforms()
    model = torchvision.models.resnet50(weights=weights).to(device)

    # Freeze base layers
    for param in model.parameters():
        param.requires_grad = False

    # Set random seeds
    torch.manual_seed(seed)
    if device == 'cuda':
        torch.cuda.manual_seed(seed)

    # Replace the classifier (fully connected layer)
    model.fc = nn.Sequential(
        nn.Linear(in_features=2048, out_features=num_classes)
    ).to(device)

    return model, transform


In [None]:
resnet_model, resnet_transforms = create_resnet_model(num_classes=29)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 134MB/s]


In [None]:
from torchinfo import summary
summary(resnet_model,
        input_size=(1, 3, 256, 256),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])


Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
ResNet (ResNet)                          [1, 3, 256, 256]     [1, 29]              --                   Partial
├─Conv2d (conv1)                         [1, 3, 256, 256]     [1, 64, 128, 128]    (9,408)              False
├─BatchNorm2d (bn1)                      [1, 64, 128, 128]    [1, 64, 128, 128]    (128)                False
├─ReLU (relu)                            [1, 64, 128, 128]    [1, 64, 128, 128]    --                   --
├─MaxPool2d (maxpool)                    [1, 64, 128, 128]    [1, 64, 64, 64]      --                   --
├─Sequential (layer1)                    [1, 64, 64, 64]      [1, 256, 64, 64]     --                   False
│    └─Bottleneck (0)                    [1, 64, 64, 64]      [1, 256, 64, 64]     --                   False
│    │    └─Conv2d (conv1)               [1, 64, 64, 64]      [1, 64, 64, 64]      (4,096)              False
│    │    

In [None]:
resnet_transforms

ImageClassification(
    crop_size=[224]
    resize_size=[232]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [None]:
import torchvision

# Predefined normalization values for ResNet50
mean = [0.485, 0.456, 0.406]  # ImageNet mean
std = [0.229, 0.224, 0.225]   # ImageNet std

# Recommended training transforms
asl_train_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),   # Resizes shortest side to 256 (ResNet's default input size)
    torchvision.transforms.RandomCrop(224),  # Randomly crop the image to 224x224
    torchvision.transforms.RandomHorizontalFlip(),  # Flip the image horizontally (hand sign can be mirrored)
    torchvision.transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Color augmentation
    torchvision.transforms.ToTensor(),   # Convert to Tensor
    torchvision.transforms.Normalize(mean=mean, std=std)  # Normalize based on ImageNet mean/std
])


In [None]:
!pip install kaggle



In [None]:
from google.colab import files
files.upload()  # Upload your kaggle.json file here

In [None]:
import os
os.makedirs('/root/.kaggle', exist_ok=True)
!mv kaggle.json /root/.kaggle/


In [None]:
!chmod 600 /root/.kaggle/kaggle.json


In [None]:
!kaggle datasets download -d grassknoted/asl-alphabet


In [None]:
!unzip asl-alphabet.zip -d /content/asl_alphabet/


In [None]:
import os
import shutil
from pathlib import Path

# Define the test directory
test_dir = Path("/content/asl_alphabet/asl_alphabet_test/asl_alphabet_test")

# Loop over all files in the test directory
for file_path in test_dir.glob("*_test.jpg"):
    filename = file_path.name  # e.g., A_test.jpg
    class_name = filename.split("_")[0]  # get 'A' from 'A_test.jpg'

    # Create class subfolder if it doesn't exist
    class_folder = test_dir / class_name
    class_folder.mkdir(exist_ok=True)

    # Move the file into the class folder
    shutil.move(str(file_path), str(class_folder / filename))

print("✅ Test folder restructured successfully!")


In [None]:
from torchvision import datasets, transforms
from pathlib import Path

# Set the path to the dataset
data_dir = Path("/content/asl_alphabet")

# Define the transformations for training and testing
asl_train_transform = transforms.Compose([
    transforms.Resize(256),  # Resize the shortest side to 256
    transforms.RandomCrop(224),  # Random crop to 224x224
    transforms.RandomHorizontalFlip(),  # Random horizontal flip
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Color jitter
    transforms.ToTensor(),  # Convert the image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

asl_test_transform = transforms.Compose([
    transforms.Resize(224),  # Resize to 224x224 for consistency
    transforms.CenterCrop(224),  # Center crop to 224x224
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Load the training and testing datasets with the correct paths
train_data = datasets.ImageFolder(root=data_dir / 'asl_alphabet_train' / 'asl_alphabet_train', transform=asl_train_transform)
test_data = datasets.ImageFolder(root=data_dir / 'asl_alphabet_test' / 'asl_alphabet_test', transform=asl_test_transform)

# Get the class names from the dataset
class_names_asl = train_data.classes
print(f"Number of classes: {len(class_names_asl)}")
print(f"First 5 classes: {class_names_asl}")


In [None]:
import os
import torch

BATCH_SIZE = 32
NUM_WORKERS = os.cpu_count()

# Create DataLoader for training and testing datasets
train_dataloader = torch.utils.data.DataLoader(dataset=train_data,
                                                  batch_size=BATCH_SIZE,
                                                  shuffle=True,
                                                  num_workers=NUM_WORKERS)

test_dataloader = torch.utils.data.DataLoader(dataset=test_data,
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=False,
                                                 num_workers=NUM_WORKERS)

# Print the length of train and test dataloaders
len(train_dataloader), len(test_dataloader)


In [None]:
from going_modular.going_modular import engine
import torch
from torch import nn


optimizer = torch.optim.Adam(params=resnet_model.parameters(), lr=1e-3)


loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)


set_seeds()

asl_results = engine.train(
    model=resnet_model,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    epochs=5,
    device=device
)


In [None]:
# Losscurvers
plot_loss_curves(asl_results)

In [None]:
from going_modular.going_modular import utils

# Save the trained ResNet model
resnet_model_path = "asl_resnet_model.pth"
utils.save_model(model=resnet_model,  # use your trained model
                 target_dir="models/",
                 model_name=resnet_model_path)


In [None]:
# Create a fresh instance of ResNet model (as you did for training)
loaded_resnet_model, resnet_transforms = create_resnet_model(num_classes=29)  # Use the correct num_classes for ASL

# Load the trained state dict
loaded_resnet_model.load_state_dict(torch.load("models/asl_resnet_model.pth"))


In [None]:
from pathlib import Path

# Get the model size in bytes, then convert to megabytes
pretrained_resnet_model_size = Path("models", resnet_model_path).stat().st_size // (1024 * 1024)  # in MB
print(f"Pretrained ResNet model size: {pretrained_resnet_model_size} MB")


In [None]:
# Import/install Gradio
try:
    import gradio as gr
except:
    !pip -q install gradio
    import gradio as gr

print(f"Gradio version: {gr.__version__}")

In [None]:
import shutil
from pathlib import Path

# Create ASL demo path
asl_demo_path = Path("demos/asl_demo/")
asl_demo_path.mkdir(parents=True, exist_ok=True)

# Make ASL demo examples directory
(asl_demo_path / "examples").mkdir(parents=True, exist_ok=True)

# List of ASL example image paths (in order A-Z + del, nothing, space)
asl_examples = [
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/A/A1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/B/B1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/C/C1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/D/D1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/E/E1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/F/F1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/G/G1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/H/H1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/I/I1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/J/J1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/K/K1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/L/L1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/M/M1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/N/N1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/O/O1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/P/P1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/Q/Q1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/R/R1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/S/S1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/T/T1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/U/U1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/V/V1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/W/W1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/X/X1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/Y/Y1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/Z/Z1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/del/del1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/nothing/nothing1000.jpg'),
    Path('/content/asl_alphabet/asl_alphabet_train/asl_alphabet_train/space/space1000.jpg')
]

def copy_images_to_examples(src_paths, dest_dir):
    dest_dir = Path(dest_dir)
    dest_dir.mkdir(parents=True, exist_ok=True)  # Ensure destination directory exists

    for src_path in src_paths:
        if src_path.exists():
            shutil.copy(src_path, dest_dir / src_path.name)
            print(f"Copied {src_path} to {dest_dir / src_path.name}")
        else:
            print(f"Source image not found: {src_path}")

# Run the function
copy_images_to_examples(asl_examples, "/content/demos/asl_demo/examples")


In [None]:
# Move trained model to ASL demo folder (will error if model is already moved)
!mv /content/models/asl_resnet_model.pth /content/demos/asl_demo


In [None]:
%%writefile /content/demos/asl_demo/app.py
import gradio as gr
import os
import torch
from model import create_resnet_model
from timeit import default_timer as timer
from typing import Tuple, Dict

# Load class names
with open("class_names.txt", "r") as f:
    class_names = [asl_name.strip() for asl_name in f.readlines()]

# Fun facts / motivational messages for each class
fun_messages = {
    "A": "✊ A for Awesome!",
    "B": "🖐 B for Brilliant!",
    "C": "👌 C for Cool!",
    "D": "👉 D for Determined!",
    "E": "✋ E for Excellent!",
    "F": "🤞 F for Fantastic!",
    "G": "👈 G for Great!",
    "H": "🤚 H for Happy!",
    "I": "☝️ I for Incredible!",
    "J": "👋 J for Joyful!",
    "K": "🤟 K for Kind!",
    "L": "🦾 L for Lucky!",
    "M": "✌️ M for Magic!",
    "N": "🤙 N for Nice!",
    "O": "⭕ O for Outstanding!",
    "P": "🅿️ P for Powerful!",
    "Q": "🔍 Q for Quick!",
    "R": "®️ R for Rocking!",
    "S": "💪 S for Strong!",
    "T": "✝️ T for Talented!",
    "U": "⛎ U for Unique!",
    "V": "✌️ V for Victorious!",
    "W": "🤘 W for Wonderful!",
    "X": "❌ X for Xtraordinary!",
    "Y": "💥 Y for Youthful!",
    "Z": "⚡ Z for Zesty!",
    "del": "🚫 Delete!",
    "nothing": "😶 Nothing detected!",
    "space": "⬜ Space!"
}

# Create model
resnet_model, resnet_transforms = create_resnet_model(
    num_classes=len(class_names),
)

# Load weights
resnet_model.load_state_dict(
    torch.load(
        f="asl_resnet_model.pth",
        map_location=torch.device("cpu"),
    )
)

def predict(img) -> Tuple[Dict, float, str]:
    start_time = timer()
    img = resnet_transforms(img).unsqueeze(0)
    resnet_model.eval()
    with torch.inference_mode():
        pred_probs = torch.softmax(resnet_model(img), dim=1)
    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
    pred_time = round(timer() - start_time, 5)
    # Get top prediction and message
    top_class = class_names[torch.argmax(pred_probs)]
    message = fun_messages.get(top_class, "Great job!")
    return pred_labels_and_probs, pred_time, message

# Create examples grid with labels
example_list = []
for example in sorted(os.listdir("examples")):
    example_list.append([f"examples/{example}"])

title = "ASL Alphabet Vision ✋🤟"
description = """
Welcome to **ASL Alphabet Vision**! 🚀
Upload or click an example image to see the predicted ASL letter.
The app predicts **A-Z, del, nothing, space** — and gives you a fun message! 🎉
"""

# Gradio interface
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Label(num_top_classes=5, label="Top Predictions"),
        gr.Number(label="Prediction Time (s)"),
        gr.Textbox(label="Fun Message")
    ],
    examples=example_list,
    title=title,
    description=description,
    allow_flagging="never"
)

demo.launch()


In [None]:
%%writefile demos/asl_demo/requirements.txt
torch
torchvision
gradio


In [None]:
%%writefile /content/demos/asl_demo/class_names.txt
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
space
del
nothing


In [None]:
%%writefile demos/asl_demo/model.py

import torch
import torchvision

from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"

def set_seeds(seed: int=42):
    """Sets random seeds for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)

def create_resnet_model(num_classes: int = 29,
                        seed: int = 42,
                        device = device):

    weights = torchvision.models.ResNet50_Weights.DEFAULT
    transform = weights.transforms()
    model = torchvision.models.resnet50(weights=weights).to(device)

    # Freeze base layers
    for param in model.parameters():
        param.requires_grad = False

    set_seeds()
    model.fc = nn.Sequential(
        nn.Linear(in_features=2048,
                  out_features=num_classes)
    ).to(device)

    return model, transform


In [None]:
# Zip the 'asl_demo' folder excluding certain files
!cd demos/asl_demo && zip -r ../asl_demo.zip * -x "*.pyc" "*.ipynb" "*__pycache__*" "*ipynb_checkpoints*"
# Download the zipped ASL demo app (if running in Google Colab)
try:
    from google.colab import files
    files.download("demos/asl_demo.zip")
except:
    print("Not running in Google Colab, can't use google.colab.files.download()")


In [None]:
len(train_data)