<a href="https://colab.research.google.com/github/RealBJr/sign-language-classifier/blob/DataPreprocessing2/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# COMP 472 Project


---

# 1) Environment Setup

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
import cv2
import os
import kagglehub
from google.colab import userdata

os.environ['KAGGLE_USERNAME'] = userdata.get('KAGGLE_USERNAME')
os.environ['KAGGLE_API_TOKEN'] = userdata.get('KAGGLE_KEY')

# imports the dataset 3 from kaggle via api
path3 = kagglehub.dataset_download("vignonantoine/combinedasldatasets")
print("Path to Dataset 3:", path3)

# imports the dataset 2 from kaggle via api
path2 = kagglehub.dataset_download("risangbaskoro/wlasl-processed")
print("Path to Dataset 2:", path2)

## 1.1) Saving Preprocessed Data to Google Drive (Persistent Storage)


In [None]:
from google.colab import drive
import shutil
import os

drive.mount('/content/drive')
# Physically transfers files from Colab's temporary "RAM" to permanent Google Drive.
# to store the already pre-processed images in the google folder to avoid long wait
drive_project_path = '/content/drive/MyDrive/SignLanguageProject/'

if not os.path.exists(drive_project_path):
    os.makedirs(drive_project_path)


print("Moving Dataset 2 images to Drive...")
shutil.move('/content/dataset2_images', drive_project_path)

print("Moving Dataset 3 subsampled images to Drive...")
shutil.move('/content/dataset3_subsampled', drive_project_path)

print("Hand-off complete! Your data is now permanently stored in Google Drive.")

## Zipping the preprocessed images in Google Drive



In [None]:
# This creates two zip files in the /content/ folder
!zip -r dataset2_processed.zip /content/dataset2_images
!zip -r dataset3_subsampled.zip /content/dataset3_subsampled


## This will import the preprocess imagea from from dataset 2 and 3


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# 2) Data

## 2.0 Checking if Dataset-2 contains videos

In [None]:
import os

# Check file extensions in Dataset 2
files = []
for root, dirs, filenames in os.walk(path2):
    for f in filenames:
        files.append(f.lower())

video_extensions = {'.mp4', '.avi', '.mov', '.mkv'}
found_videos = [f for f in files if any(f.endswith(ext) for ext in video_extensions)]

if found_videos:
    print(f"Dataset 2 contains {len(found_videos)} videos. OpenCV extraction is required.")
else:
    print("Dataset 2 contains images. No OpenCV extraction needed.")

## 2.1) Create Datasets

### 2.1.1) Subsampling Dataset-3 to 20k images


In [None]:
import os
import random
import shutil


source_dir = path3
target_dir = '/content/dataset3_subsampled'

def subsample_data(src, dest, total_goal=20000):
    if not os.path.exists(dest): os.makedirs(dest)

    all_images = []
    for root, dirs, files in os.walk(src):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                all_images.append(os.path.join(root, file))


    selected = random.sample(all_images, min(total_goal, len(all_images)))

    for img_path in selected:
        rel_path = os.path.relpath(img_path, src)
        new_path = os.path.join(dest, rel_path)
        os.makedirs(os.path.dirname(new_path), exist_ok=True)
        shutil.copy(img_path, new_path)
    print(f"Subsampling complete: {len(selected)} images moved to {dest}")

subsample_data(source_dir, target_dir)

Creation of dummy dataset to ensure to ensure that pipeline works:

In [None]:
class DummyImageDataset(Dataset):
    def __init__(self, num_samples=1000, num_classes=10, image_size=(3, 224, 224)):
        self.num_samples = num_samples
        self.num_classes = num_classes
        self.image_size = image_size

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        image = torch.randn(self.image_size)          # fake image
        label = torch.randint(0, self.num_classes, (1,)).item()  # fake class
        return image, label

## Extracting the preprocessed images from the google drive folders

In [None]:
from torchvision import datasets

# Points the AI models to the files in your Drive so they can start learning.
drive_project_path = '/content/drive/MyDrive/SignLanguageProject/'

# Point to extracted images folder for Dataset 2
train_dataset2 = datasets.ImageFolder(root=drive_project_path + 'dataset2_images', transform=data_transforms)

# Point to SUBSAMPLED folder for Dataset 3
train_dataset3 = datasets.ImageFolder(root=drive_project_path + 'dataset3_subsampled', transform=data_transforms)

print(f"Dataset 2 (WLASL) Ready from Drive: {len(train_dataset2)} images")
print(f"Dataset 3 (Combined) Ready from Drive: {len(train_dataset3)} images")

## 2.2) Data Splitting

Dummy data splitting in training and validation steps, used for testing pipeline works:

In [None]:
from torch.utils.data import random_split

# Split Dataset 2 (WLASL)
train_size2 = int(0.8 * len(train_dataset2))
val_size2 = len(train_dataset2) - train_size2
ds2_train, ds2_val = random_split(train_dataset2, [train_size2, val_size2])

# Split Dataset 3 (Combined ASL)
train_size3 = int(0.8 * len(train_dataset3))
val_size3 = len(train_dataset3) - train_size3
ds3_train, ds3_val = random_split(train_dataset3, [train_size3, val_size3])

print(f"Dataset 2 Split: {train_size2} Train, {val_size2} Val")
print(f"Dataset 3 Split: {train_size3} Train, {val_size3} Val")

## 2.3) Data Loaders

In [None]:
from torch.utils.data import DataLoader

# These loaders are the final output of the Preprocessing stage
# Dataset 2 (WLASL) Loaders
train_loader2 = DataLoader(ds2_train, batch_size=32, shuffle=True)
val_loader2 = DataLoader(ds2_val, batch_size=32)

# Dataset 3 (Combined ASL) Loaders
train_loader3 = DataLoader(ds3_train, batch_size=32, shuffle=True)
val_loader3 = DataLoader(ds3_val, batch_size=32)

print("DataLoaders ready for training")

Dummy data loaders used for testing pipeline works:

In [None]:
dummy_train_loader = DataLoader(dummy_train_dataset, batch_size=32, shuffle=True)
dummy_val_loader   = DataLoader(dummy_val_dataset, batch_size=32)

## 2.4) Data Preprocessing

### 2.4.1) extracting middle frame from videos in dataset 2

In [None]:
import cv2
import os

# Define where to save the extracted frames
output_dir2 = '/content/dataset2_images'
if not os.path.exists(output_dir2):
    os.makedirs(output_dir2)

def extract_frames(video_root, output_root):
    for root, dirs, files in os.walk(video_root):
        for file in files:
            if file.lower().endswith(('.mp4', '.avi', '.mov')):
                video_path = os.path.join(root, file)

                # Maintain folder structure for classes (labels)
                rel_path = os.path.relpath(root, video_root)
                label_folder = os.path.join(output_root, rel_path)
                os.makedirs(label_folder, exist_ok=True)

                # OpenCV Extraction
                cap = cv2.VideoCapture(video_path)
                total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2) # Get middle frame

                success, frame = cap.read()
                if success:
                    img_name = file.rsplit('.', 1)[0] + ".jpg"
                    cv2.imwrite(os.path.join(label_folder, img_name), frame)
                cap.release()
    print("Extraction Complete!")

# Run the extraction (using your path2 variable)
extract_frames(path2, output_dir2)

## Resizing frames to 224 x 224

In [None]:
from torchvision import transforms

# Requirement: Resize to 224x224 and normalize for MobileNet/ResNet/VGG
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 3) Training Procedure

## 3.1) Load Models

With pretrained models, the accuracy will theoretically be better because the model doesn't have to learn certain characteristics(classes) all over again. Models available [here](https://docs.pytorch.org/vision/main/models.html).

In [None]:
model_resnet = models.resnet18(weights="IMAGENET1K_V1")
model_mobilenet = models.mobilenet_v2(weights="IMAGENET1K_V1")
model_vgg = models.vgg16(weights="IMAGENET1K_V1")

## 3.2) Classifier Head Replacement

## 3.3) Freeze Feature Extractor

## 3.4) Optimizer + Loss Setup

## 3.5) Forward Pass

## 3.6) Loss Calculation

## 3.7) Backpropagation

## 3.8) Weight Update Step

## 3.9) Validation Phase

## 3.10) Optimization

## 3.11) Save Trained Model

# 4) Model Evaluation & Analysis