In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0067.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0153.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0105.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0018.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0050.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0029.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0195.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0051.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0074.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0142.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0149.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0121.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0194.png
/kaggle/input/leapgestrecog/leapGestRecog/07/02_l/frame_07_02_0003.png
/kaggl

In [2]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
import torchvision.models as models
import torch.optim as optim

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
class LeapGestureDataset(Dataset):
    def __init__(self, root_dir, transform=None):
       
        self.root_dir = root_dir
        self.transform = transform
        
        # Define class names based on the folder structure shown
        self.class_names = [
            'palm', 'l', 'fist', 'fist_moved', 'thumb', 
            'index', 'ok', 'palm_moved', 'c', 'down'
        ]
        
        # Create class to index mapping
        self.class_to_idx = {class_name: idx for idx, class_name in enumerate(self.class_names)}
        
        # Collect all image paths and labels
        self.samples = []
        self._load_samples()
    
    def _load_samples(self):
        # Iterate through numbered folders (00, 01, 02, etc.)
        for folder_num in range(10):  # 00 to 09
            folder_name = f"{folder_num:02d}"
            folder_path = os.path.join(self.root_dir, folder_name)
            
            if not os.path.exists(folder_path):
                continue
            
            # Iterate through gesture class folders within each numbered folder
            for class_folder in os.listdir(folder_path):
                class_path = os.path.join(folder_path, class_folder)
                
                if not os.path.isdir(class_path):
                    continue
                
                # Extract class name from folder name (remove number prefix)
                # e.g., "01_palm" -> "palm"
                class_name = class_folder.split('_', 1)[-1] if '_' in class_folder else class_folder
                
                # Skip if class name not in our predefined classes
                if class_name not in self.class_to_idx:
                    continue
                
                class_idx = self.class_to_idx[class_name]
                
                # Load all images from this class folder
                for img_file in os.listdir(class_path):
                    if img_file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                        img_path = os.path.join(class_path, img_file)
                        self.samples.append((img_path, class_idx))
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        img_path, label = self.samples[idx]
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            # Return a black image as fallback
            image = Image.new('RGB', (224, 224), color='black')
        
        # Apply transforms if provided
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [4]:
def create_train_val_datasets(root_dir, train_ratio=0.8, random_state=42, transform = None):
    
    # First create the full dataset to get all samples
    full_dataset = LeapGestureDataset(root_dir, transform=None)
    
    if len(full_dataset.samples) == 0:
        raise ValueError("No samples found in dataset")
    
    # Extract paths and labels
    paths = [sample[0] for sample in full_dataset.samples]
    labels = [sample[1] for sample in full_dataset.samples]
    
    # Stratified split to maintain class distribution in both sets
    train_paths, val_paths, train_labels, val_labels = train_test_split(
        paths, labels,
        test_size=1-train_ratio,
        random_state=random_state,
        stratify=labels  # This ensures balanced class distribution
    )

    
    # Create training dataset
    train_dataset = LeapGestureDataset(root_dir, transform=transform)
    train_dataset.samples = list(zip(train_paths, train_labels))
    
    # Create validation dataset
    val_dataset = LeapGestureDataset(root_dir, transform=transform)
    val_dataset.samples = list(zip(val_paths, val_labels))
    
    return train_dataset, val_dataset

In [5]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p = 0.5),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset, val_dataset = create_train_val_datasets(root_dir = "/kaggle/input/leapgestrecog/leapGestRecog",train_ratio=0.8, random_state=42, transform = transform)
train_dataloader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
val_dataloader = DataLoader(val_dataset, batch_size = 32, shuffle = False)

In [6]:
resnet50 = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
num_classes = len(train_dataset.class_names)
resnet50.fc = nn.Linear(resnet50.fc.in_features, num_classes)
resnet50.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 215MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet50.parameters(), lr=0.01)
epochs = 50

def validate(val_dir = None, model = None, device = None):
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for images, lables in val_dir:
            images, lables = images.to(device), lables.to(device)
            output = model(images)
            loss = criterion(output, lables)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(val_dir)
    epoch_accuracy = 100 * correct_predictions / total_samples

    return epoch_loss, epoch_accuracy
    
for e in range(epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    for idx,(image, labels) in enumerate(train_dataloader):
        image, labels = image.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = resnet50(image)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    val_loss, val_accuracy = validate(val_dir = val_dataloader, model = resnet50, device = device)
    print(f"epoch: {e+1}/{epochs}, loss: {running_loss/len(train_dataloader)}, accuracy: {100 * correct_predictions / total_samples}")
    print(f"validation loss: {val_loss}, accuracy: {val_accuracy}")

epoch: 1/50, loss: 1.2042468405365945, accuracy: 56.6
validation loss: 0.47551396131515505, accuracy: 93.75
epoch: 2/50, loss: 617.8993271080255, accuracy: 10.84375
validation loss: 2.307673131942749, accuracy: 12.5
epoch: 3/50, loss: 2.2020946083068846, accuracy: 15.11875
validation loss: 1.9830916862487793, accuracy: 31.25
epoch: 4/50, loss: 1.839649343252182, accuracy: 32.4125
validation loss: 1.419802568435669, accuracy: 28.125
epoch: 5/50, loss: 0.7566554910838604, accuracy: 73.775
validation loss: 0.4600538889169693, accuracy: 84.375
epoch: 6/50, loss: 0.4404877052018419, accuracy: 88.35625
validation loss: 1.705400505065918, accuracy: 31.25
epoch: 7/50, loss: 0.560150990858674, accuracy: 80.2
validation loss: 0.13596255823224784, accuracy: 93.75
epoch: 8/50, loss: 0.15717170380568132, accuracy: 94.83125
validation loss: 0.07087150221690536, accuracy: 100.0
epoch: 9/50, loss: 0.10089043656311697, accuracy: 96.83125
validation loss: 0.06377009812183679, accuracy: 93.75
epoch: 10/5