In [6]:
import os
import zipfile
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

# -------------------- SETTINGS --------------------
zip_path = r"C:\Users\jainp\OneDrive\Documents\Downloads\archive.zip"
extract_dir = r"C:\Users\jainp\OneDrive\Documents\Downloads\gesture_data\00\leapGestRecog\00"


gesture_classes = ["02_l", "03_fist", "01_palm"]
label_map = {"02_l": 0, "03_fist": 1, "01_palm": 2}
batch_size = 32
num_epochs = 10
image_size = 50

# -------------------- UNZIP --------------------
if not os.path.exists(extract_dir):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    print("‚úÖ Extracted zip file.")
else:
    print("‚ÑπÔ∏è Folder already extracted.")

print("üîç Extracted to:", extract_dir)
print("üìÇ Available folders:", os.listdir(extract_dir))

# -------------------- TRANSFORMS --------------------
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])

# -------------------- CUSTOM DATASET --------------------
class GestureDataset(Dataset):
    def __init__(self, root_dir, classes, transform=None):
        self.images = []
        self.labels = []
        self.transform = transform

        for folder in classes:
            folder_path = os.path.join(root_dir, folder)
            if not os.path.exists(folder_path):
                print(f"‚ùå Folder not found: {folder_path}")
                continue
            for img_name in os.listdir(folder_path):
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(folder_path, img_name)
                    self.images.append(img_path)
                    self.labels.append(label_map[folder])

        print(f"‚úÖ Loaded {len(self.images)} images.")

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx])
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, label

# -------------------- DATA LOADER --------------------
dataset = GestureDataset(extract_dir, gesture_classes, transform=transform)
if len(dataset) == 0:
    raise ValueError("‚ùå Dataset is empty. Please check folder structure and image files.")
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# -------------------- MODEL --------------------
class GestureCNN(nn.Module):
    def __init__(self):
        super(GestureCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 12 * 12, 128),
            nn.ReLU(),
            nn.Linear(128, 3)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

model = GestureCNN()

# -------------------- TRAINING --------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"üìà Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

print("‚úÖ Training complete!")

# -------------------- SAVE MODEL --------------------
torch.save(model.state_dict(), "gesture_cnn.pth")
print("üíæ Model saved as gesture_cnn.pth")

# -------------------- PREDICTION FUNCTION --------------------
def predict_image(image_path, model):
    model.eval()
    img = Image.open(image_path)
    img = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(img)
        _, predicted = torch.max(output, 1)
    return predicted.item()

# -------------------- TEST ON SAMPLE IMAGE --------------------
def get_first_image_path(folder_path):
    for file_name in os.listdir(folder_path):
        if file_name.endswith((".png", ".jpg", ".jpeg")):
            return os.path.join(folder_path, file_name)
    return None


‚ÑπÔ∏è Folder already extracted.
üîç Extracted to: C:\Users\jainp\OneDrive\Documents\Downloads\gesture_data\00\leapGestRecog\00
üìÇ Available folders: ['01_palm', '02_l', '03_fist', '04_fist_moved', '05_thumb', '06_index', '07_ok', '08_palm_moved', '09_c', '10_down']
‚úÖ Loaded 600 images.
üìà Epoch [1/10], Loss: 1.0030
üìà Epoch [2/10], Loss: 0.3553
üìà Epoch [3/10], Loss: 0.0569
üìà Epoch [4/10], Loss: 0.0146
üìà Epoch [5/10], Loss: 0.0075
üìà Epoch [6/10], Loss: 0.0036
üìà Epoch [7/10], Loss: 0.0043
üìà Epoch [8/10], Loss: 0.0025
üìà Epoch [9/10], Loss: 0.0020
üìà Epoch [10/10], Loss: 0.0012
‚úÖ Training complete!
üíæ Model saved as gesture_cnn.pth
