In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision.utils import save_image
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import imageio
import matplotlib.image as mpimg
from scipy import ndimage
import cv2
from sklearn.model_selection import train_test_split


In [3]:
from google.colab import drive
import sys

# Mount Google Drive
drive.mount('/content/drive')

# Define the file path
data_path = "/content/drive/My Drive/Cmpt_419_Project/dataset_green/"

batch_size = 32
num_epochs = 100
learning_rate = 0.0002


Mounted at /content/drive


In [None]:
data_path = 'dataset_green/'
batch_size = 32
num_epochs = 100
learning_rate = 0.0002


In [5]:
# Load the data
from image_dataloader import GestureImageDataset

transform = transforms.Compose([
    transforms.RandomRotation(degrees=15),  # Rotate within ±15 degrees
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Shift up to 10%
    transforms.RandomResizedCrop(28, scale=(0.9, 1.1)),  # Slight zoom in/out
    transforms.Resize((28, 28)),
    transforms.ToTensor(),  # Converts to (C, H, W) where C=1 for grayscale
])


dataset = GestureImageDataset(data_path, transform=transform)

print("Dataset size: ", len(dataset))

# Split the data into training and validation sets

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

print("Training set size: ", len(train_dataset))
print("Validation set size: ", len(val_dataset))

# Create the dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)




Loaded 88 images from 3 classes.
Dataset size:  88
Training set size:  70
Validation set size:  18


In [6]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
# Create a Convolutional Neural Network
import torch
import torch.nn as nn

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(64 * 7 * 7, 1000)  # Corrected input size (3136)
        self.fc2 = nn.Linear(1000, 3)  # Assuming 3 classes

    def forward(self, x):
        out = self.layer1(x)  # (batch, 32, 14, 14)
        out = self.layer2(out)  # (batch, 64, 7, 7)
        out = out.view(out.size(0), -1)  # Flatten to (batch, 3136)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

# Test with a random input tensor
# model = ConvNet()
# x = torch.randn(8, 1, 28, 28)  # Batch size = 8, Grayscale image (1, 28, 28)
# output = model(x)
# print(output.shape)  # Expected: (8, 26)


In [8]:
# Create the model
model = ConvNet().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
# Train the model
total_step = len(train_loader)
train_loss = []
val_loss = []
train_acc = []
val_acc = []

for epoch in range(num_epochs):
    correct_train = 0
    total_train = 0
    model.train()  # Set model to training mode

    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        train_loss.append(loss.item())

        # Compute training accuracy
        _, predicted = torch.max(outputs, 1)  # Get class with highest probability
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


    # Compute overall training accuracy
    train_accuracy = 100 * correct_train / total_train
    train_acc.append(train_accuracy)

    # Validation
    model.eval()  # Set model to evaluation mode
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for i, (images, labels) in enumerate(val_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss.append(loss.item())

            # Compute validation accuracy
            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_accuracy = 100 * correct_val / total_val
    val_acc.append(val_accuracy)

    print(f'Epoch [{epoch+1}/{num_epochs}] - '
          f'Train Loss: {loss.item():.4f}, Train Accuracy: {train_accuracy:.2f}% - '
          f'Validation Loss: {loss.item():.4f}, Validation Accuracy: {val_accuracy:.2f}%')



Epoch [1/100] - Train Loss: 1.1018, Train Accuracy: 27.14% - Validation Loss: 1.1018, Validation Accuracy: 27.78%
Epoch [2/100] - Train Loss: 1.1114, Train Accuracy: 30.00% - Validation Loss: 1.1114, Validation Accuracy: 33.33%
Epoch [3/100] - Train Loss: 1.1202, Train Accuracy: 31.43% - Validation Loss: 1.1202, Validation Accuracy: 38.89%
Epoch [4/100] - Train Loss: 1.1119, Train Accuracy: 38.57% - Validation Loss: 1.1119, Validation Accuracy: 27.78%
Epoch [5/100] - Train Loss: 1.1057, Train Accuracy: 38.57% - Validation Loss: 1.1057, Validation Accuracy: 27.78%
Epoch [6/100] - Train Loss: 1.1041, Train Accuracy: 31.43% - Validation Loss: 1.1041, Validation Accuracy: 33.33%
Epoch [7/100] - Train Loss: 1.0919, Train Accuracy: 34.29% - Validation Loss: 1.0919, Validation Accuracy: 38.89%
Epoch [8/100] - Train Loss: 1.0896, Train Accuracy: 37.14% - Validation Loss: 1.0896, Validation Accuracy: 33.33%
Epoch [9/100] - Train Loss: 1.1481, Train Accuracy: 28.57% - Validation Loss: 1.1481, Va

In [12]:
# Save the final model after training is complete
final_model_path = "/content/drive/MyDrive/Cmpt_419_Project/arm_gesture_model_100.pth"
torch.save(model.state_dict(), final_model_path)


In [11]:
import torch
from torchvision import transforms
from PIL import Image

def predict_image(model, image_path, device):
    """
    Given a trained model and an image path, predict the label of the image.
    """
    # Define the same transformations used during training
    transform = transforms.Compose([
        transforms.Grayscale(),  # Ensure image is grayscale (if applicable)
        transforms.Resize((28, 28)),  # Resize to match model input size
        transforms.ToTensor(),  # Convert to tensor
    ])

    # Load image
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0)  # Add batch dimension: (1, 1, 28, 28)

    # Move to device (CPU/GPU)
    image = image.to(device)

    # Set model to evaluation mode
    model.eval()

    # Disable gradient calculation for inference
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)  # Get predicted class

    return predicted.item()  # Return the predicted label

# Example Usage
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

#image_path = "unused_data/right6.jpg"  # Replace with your image path
image_path = "right6.jpg"  # Replace with your image path
predicted_label = predict_image(model, image_path, device)

print(f"Predicted label: {predicted_label}")


Predicted label: 1
