In [4]:
"""
Hand Gesture Recognition - PyTorch Preprocessing Pipeline
"""

import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# ======================
# Configuration
# ======================
class Config:
    # Directory configuration
    base_dir = "/Users/harshithkethireddy/Documents/intelligent_data"
    train_dir = os.path.join(base_dir, "train/train")
    test_dir = os.path.join(base_dir, "test/test")
    
    # Image parameters
    img_size = (224, 224)
    num_classes = 20
    batch_size = 32
    num_workers = 4
    validation_ratio = 0.2
    random_seed = 42

# Set random seeds for reproducibility
torch.manual_seed(Config.random_seed)
np.random.seed(Config.random_seed)

# ======================
# Custom Dataset Class
# ======================
class HandGestureDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.image_paths = []
        self.labels = []
        
        # Load image paths and labels
        for label, class_name in enumerate(tqdm(self.classes, desc="Loading Classes")):
            class_dir = os.path.join(root_dir, class_name)
            for img_name in os.listdir(class_dir):
                self.image_paths.append(os.path.join(class_dir, img_name))
                self.labels.append(label)
                
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        
        # Load image
        image = Image.open(img_path).convert('L')  # Convert to grayscale
        
        # Apply transformations
        if self.transform:
            image = self.transform(image)
            
        return image, label

# ======================
# Transformation Pipelines
# ======================
# Base transformations
base_transform = transforms.Compose([
    transforms.Resize(Config.img_size),
    transforms.ToTensor(),  # Converts to [0,1] and CxHxW format
])

# Augmentation transformations
train_transform = transforms.Compose([
    transforms.Resize(Config.img_size),
    transforms.RandomRotation(15),  # ±15 degrees
    transforms.RandomAffine(
        degrees=0, 
        translate=(0.1, 0.1),  # ±10% translation
        scale=(0.9, 1.1)  # ±10% zoom
    ),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize to [-1,1]
])

# Validation/test transformations
val_test_transform = transforms.Compose([
    transforms.Resize(Config.img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# ======================
# Data Loading Functions
# ======================
def create_datasets():
    # Create base datasets
    full_train_dataset = HandGestureDataset(
        Config.train_dir,
        transform=base_transform  # Apply only basic transforms initially
    )
    
    test_dataset = HandGestureDataset(
        Config.test_dir,
        transform=base_transform
    )
    
    # Split training into train/validation
    train_size = int((1 - Config.validation_ratio) * len(full_train_dataset))
    val_size = len(full_train_dataset) - train_size
    
    train_dataset, val_dataset = random_split(
        full_train_dataset,
        [train_size, val_size],
        generator=torch.Generator().manual_seed(Config.random_seed)
    )
    
    # Apply proper transforms to each subset
    train_dataset.dataset.transform = train_transform
    val_dataset.dataset.transform = val_test_transform
    test_dataset.transform = val_test_transform
    
    return train_dataset, val_dataset, test_dataset

# ======================
# Data Visualization
# ======================
def visualize_batch(dataloader, num_images=8):
    dataiter = iter(dataloader)
    images, labels = next(dataiter)
    
    # Convert images back to 0-1 range for display
    inverse_normalize = transforms.Normalize(
        mean=[-0.5/0.5],  # Reverse normalization
        std=[1/0.5]
    )
    images = inverse_normalize(images)
    
    fig = plt.figure(figsize=(12, 6))
    for i in range(num_images):
        ax = plt.subplot(2, 4, i+1)
        img = images[i].squeeze().numpy()
        plt.imshow(img, cmap='gray')
        plt.title(f"Label: {labels[i].item()}")
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# ======================
# Data Loader Creation
# ======================
def create_dataloaders(train_dataset, val_dataset, test_dataset):
    train_loader = DataLoader(
        train_dataset,
        batch_size=Config.batch_size,
        shuffle=True,
        num_workers=Config.num_workers,
        pin_memory=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=Config.batch_size,
        shuffle=False,
        num_workers=Config.num_workers,
        pin_memory=True
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=Config.batch_size,
        shuffle=False,
        num_workers=Config.num_workers,
        pin_memory=True
    )
    
    return train_loader, val_loader, test_loader

# ======================
# Dataset Statistics
# ======================
def calculate_dataset_stats(dataset):
    loader = DataLoader(
        dataset,
        batch_size=Config.batch_size,
        num_workers=Config.num_workers
    )
    
    mean = 0.
    std = 0.
    nb_samples = 0.
    
    for images, _ in tqdm(loader, desc="Calculating stats"):
        batch_samples = images.size(0)
        images = images.view(batch_samples, images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        nb_samples += batch_samples
        
    mean /= nb_samples
    std /= nb_samples
    
    return mean, std

# ======================
# Main Execution
# ======================
if __name__ == "__main__":
    # Create datasets
    train_dataset, val_dataset, test_dataset = create_datasets()
    
    # Create dataloaders
    train_loader, val_loader, test_loader = create_dataloaders(
        train_dataset, val_dataset, test_dataset
    )
    
    # Print dataset statistics
    print("\nDataset Statistics:")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    print(f"Test samples: {len(test_dataset)}")
    
    # Calculate and print mean/std
    train_mean, train_std = calculate_dataset_stats(train_dataset)
    print(f"\nTraining set - Mean: {train_mean.item():.4f}, Std: {train_std.item():.4f}")
    
    # Visualize training batch
    print("\nVisualizing training batch...")
    visualize_batch(train_loader)
    
    # Example usage in training loop:
    print("\nSample training loop structure:")
    for epoch in range(2):  # Demo with 2 epochs
        print(f"\nEpoch {epoch+1}")
        progress_bar = tqdm(train_loader, desc="Training")
        for images, labels in progress_bar:
            # Your training code here
            pass

Loading Classes: 100%|██████████| 20/20 [00:00<00:00, 385.24it/s]
Loading Classes: 100%|██████████| 20/20 [00:00<00:00, 984.20it/s]



Dataset Statistics:
Training samples: 14400
Validation samples: 3600
Test samples: 6000


Calculating stats:   0%|          | 0/450 [00:00<?, ?it/s]Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/harshithkethireddy/opt/miniconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/harshithkethireddy/opt/miniconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'HandGestureDataset' on <module '__main__' (built-in)>
