In [39]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import torchvision
import pandas as pd
import numpy as np
from torchvision.datasets import ImageFolder
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image


### Loading the Data

First, I will load my data and transform them to Tensors for PyTorch readability. I will define my transformations as follows.

In [40]:
transformations = Compose([
    Resize((224, 224)), 
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalize images
])
# now create the actual datasets
train_dataset = ImageFolder(root='human_data/train_data', transform=transformations)
test_dataset = ImageFolder(root='human_data/test_data', transform=transformations)

#load the data
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

### Defining the model

I will create a simple CNN with this architecture:
- Convolutional Layer
- ReLu for the activation function
- MaxPooling for the pooling layer
- Fully connected layer
- And softMax for the output activation

In [49]:
class SimpleCNN(nn.Module):
    #initialize the model
    def __init__(self, num_classes = 15):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 112 * 56, 512)
        self.fc2 = nn.Linear(512, num_classes)

    #define the forward pass
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    

### Optimizer and Loss Function
I will now define the optimizer, and Losws function.  I will use cross entropy loss for the loss function, and Adam for the optimizer with a learning rate of 0.001.  I will also add on an accuracy test to print out the accuracy during training.

In [52]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Training on device: {device}")

model = SimpleCNN(num_classes=15).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

def evaluate_accuracy(model, data_loader, device):
    model.eval()  
    correct, total = 0, 0
    with torch.no_grad():
        for data in data_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

for epoch in range(num_epochs):
    model.train() 
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    train_accuracy = evaluate_accuracy(model, train_loader, device)
    validation_accuracy = evaluate_accuracy(model, test_loader, device)
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}, Train Acc: {train_accuracy:.2f}, Validation Acc: {validation_accuracy:.2f}')

print('Finished Training')

model_path = 'human_action_model.pth'
torch.save(model.state_dict(), model_path)


Training on device: mps
Epoch 1, Loss: 8.375637081386184, Train Acc: 6.67, Validation Acc: 6.67
Epoch 2, Loss: 2.7101482521496347, Train Acc: 6.67, Validation Acc: 6.67
Epoch 3, Loss: 2.709926695203476, Train Acc: 6.67, Validation Acc: 6.67
Epoch 4, Loss: 2.709820199368605, Train Acc: 6.67, Validation Acc: 6.67


KeyboardInterrupt: 

### Predicting Custom Images
I will now make a simple function that I can use to predict the action on simple images.  This will be adjusted consistently 

In [None]:
def predict_image(image_path, model, device):
    model.eval()  # Set model to evaluation mode
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Match the training setup
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)  # Add batch dimension and transfer to device
    
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
        return predicted.item() 