In [17]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image

# -------------------- SETTINGS --------------------
data_dir = r"C:\Users\sarik\Downloads\Prodology internship\task 4\archive\leapGestRecog\00"
gesture_classes = ["02_l", "03_fist", "01_palm"]
label_map = {"02_l": 0, "03_fist": 1, "01_palm": 2}
batch_size = 32
num_epochs = 10
image_size = 50

# -------------------- TRANSFORMS --------------------
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])

# -------------------- CUSTOM DATASET --------------------
from torch.utils.data import Dataset

class GestureDataset(Dataset):
    def __init__(self, root_dir, classes, transform=None):
        self.images = []
        self.labels = []
        self.transform = transform
        for folder in classes:
            folder_path = os.path.join(root_dir, folder)
            for img_name in os.listdir(folder_path):
                img_path = os.path.join(folder_path, img_name)
                self.images.append(img_path)
                self.labels.append(label_map[folder])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx])
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, label

# -------------------- DATA LOADER --------------------
dataset = GestureDataset(data_dir, gesture_classes, transform=transform)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# -------------------- MODEL --------------------
class GestureCNN(nn.Module):
    def __init__(self):
        super(GestureCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 12 * 12, 128),
            nn.ReLU(),
            nn.Linear(128, 3)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

model = GestureCNN()

# -------------------- TRAINING --------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

print("Training complete!")

# -------------------- SAVE MODEL --------------------
torch.save(model.state_dict(), "gesture_cnn.pth")

# -------------------- PREDICTION FUNCTION --------------------
def predict_image(image_path, model):
    model.eval()
    img = Image.open(image_path)
    img = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(img)
        _, predicted = torch.max(output, 1)
    return predicted.item()

# -------------------- TEST ON SAMPLE IMAGE --------------------
def get_first_image_path(folder_path):
    for file_name in os.listdir(folder_path):
        if file_name.endswith((".png", ".jpg")):
            return os.path.join(folder_path, file_name)
    return None

# Choose any of the class folders for prediction
test_folder = os.path.join(data_dir, "01_palm")
test_image = get_first_image_path(test_folder)

if test_image:
    prediction = predict_image(test_image, model)
    reverse_label_map = {0: "L", 1: "fist", 2: "palm"}
    print(f"Predicted class: {reverse_label_map[prediction]}")
else:
    print("No image found to test.")


Epoch [1/10], Loss: 0.9811
Epoch [2/10], Loss: 0.6362
Epoch [3/10], Loss: 0.2375
Epoch [4/10], Loss: 0.0635
Epoch [5/10], Loss: 0.0133
Epoch [6/10], Loss: 0.0046
Epoch [7/10], Loss: 0.0023
Epoch [8/10], Loss: 0.0016
Epoch [9/10], Loss: 0.0013
Epoch [10/10], Loss: 0.0010
Training complete!
Predicted class: palm


In [19]:
torch.save(model.state_dict(), 'gesture_model.pth')
print("Model saved successfully.")


Model saved successfully.
