In [None]:
# Import required lib

import pandas as pd
from google.colab import drive
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.utils import resample
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.optim as optim
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F


In [None]:
# Connect to Google Drive

drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Load Data

train_labels_path = '/content/drive/MyDrive/Human Action Recognition/Training_set.csv'
train_images_path = '/content/drive/MyDrive/Human Action Recognition/train'


In [None]:
# Read CSV File

train_labels = pd.read_csv(train_labels_path)


In [None]:
# Select randomly 100 images from each class with their respective labels

def balanced_subset(df, n_samples):
  dfs = []
  for label in df['label'].unique():
    subset = df[df['label'] == label]
    sampled_subset = resample(subset, replace=False, n_samples=n_samples, random_state=42)
    dfs.append(sampled_subset)
  return pd.concat(dfs)

balanced_train_labels = balanced_subset(train_labels, 100)
print(balanced_train_labels['label'].value_counts())


label
sitting               100
using_laptop          100
hugging               100
sleeping              100
drinking              100
clapping              100
dancing               100
cycling               100
calling               100
laughing              100
eating                100
fighting              100
listening_to_music    100
running               100
texting               100
Name: count, dtype: int64


In [None]:
# Convert label into numeric

le = LabelEncoder()
balanced_train_labels['label'] = le.fit_transform(balanced_train_labels['label'])
print(balanced_train_labels['label'].value_counts())


label
11    100
14    100
7     100
12    100
4     100
1     100
3     100
2     100
0     100
8     100
5     100
6     100
9     100
10    100
13    100
Name: count, dtype: int64


In [None]:
# Split data into training and testing

class HumanActionDataset(Dataset):
  def __init__(self, labels_df, images_path, transform=None):
    self.labels_df = labels_df
    self.images_path = images_path
    self.transform = transform

  def __len__(self):
    return len(self.labels_df)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()

    img_name = self.labels_df.iloc[idx, 0]
    image = Image.open(f'{self.images_path}/{img_name}')
    label = self.labels_df.iloc[idx, 1]

    if self.transform:
      image = self.transform(image)

    return image, label

data_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = HumanActionDataset(balanced_train_labels, train_images_path, transform=data_transform)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)


In [None]:
# Define CNN Model

class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
    self.bn1 = nn.BatchNorm2d(16)
    self.relu1 = nn.ReLU()
    self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
    self.bn2 = nn.BatchNorm2d(32)
    self.relu2 = nn.ReLU()
    self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
    self.bn3 = nn.BatchNorm2d(64)
    self.relu3 = nn.ReLU()
    self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.fc1 = nn.Linear(64 * 8 * 8, 512)
    self.relu4 = nn.ReLU()
    self.dropout = nn.Dropout(p=0.5)
    self.fc2 = nn.Linear(512, 15)

  def forward(self, x):
    x = self.pool1(self.relu1(self.bn1(self.conv1(x))))
    x = self.pool2(self.relu2(self.bn2(self.conv2(x))))
    x = self.pool3(self.relu3(self.bn3(self.conv3(x))))
    x = x.view(-1, 64 * 8 * 8)
    x = self.dropout(self.relu4(self.fc1(x)))
    x = self.fc2(x)
    return x

model = CNN()


In [None]:
# Select loss function and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 25
for epoch in range(num_epochs):
  running_loss = 0.0
  correct = 0
  total = 0
  for i, data in enumerate(train_dataloader, 0):
    inputs, labels = data

    optimizer.zero_grad()

    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  epoch_loss = running_loss / len(train_dataloader)
  epoch_acc = 100 * correct / total
  print(f'Epoch {epoch+1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')

print('Finished Training')


Epoch 1, Loss: 1.7710, Accuracy: 41.19%
Epoch 2, Loss: 1.6754, Accuracy: 40.71%
Epoch 3, Loss: 1.5997, Accuracy: 43.33%
Epoch 4, Loss: 1.5874, Accuracy: 43.81%
Epoch 5, Loss: 1.4327, Accuracy: 49.52%
Epoch 6, Loss: 1.5453, Accuracy: 44.29%
Epoch 7, Loss: 1.4784, Accuracy: 48.10%
Epoch 8, Loss: 1.3795, Accuracy: 50.71%
Epoch 9, Loss: 1.3392, Accuracy: 49.76%
Epoch 10, Loss: 1.3440, Accuracy: 51.67%
Epoch 11, Loss: 1.2939, Accuracy: 51.19%
Epoch 12, Loss: 1.2338, Accuracy: 55.24%
Epoch 13, Loss: 1.2591, Accuracy: 54.52%
Epoch 14, Loss: 1.2835, Accuracy: 52.62%
Epoch 15, Loss: 1.1073, Accuracy: 60.95%
Epoch 16, Loss: 1.1366, Accuracy: 58.81%
Epoch 17, Loss: 1.1424, Accuracy: 56.90%
Epoch 18, Loss: 1.0984, Accuracy: 59.29%
Epoch 19, Loss: 1.1633, Accuracy: 57.62%
Epoch 20, Loss: 1.0717, Accuracy: 58.57%
Epoch 21, Loss: 1.0696, Accuracy: 60.48%
Epoch 22, Loss: 0.9589, Accuracy: 64.52%
Epoch 23, Loss: 0.9394, Accuracy: 65.71%
Epoch 24, Loss: 0.9409, Accuracy: 63.57%
Epoch 25, Loss: 0.9983, A

In [None]:
# Define the hyperparameter search space

learning_rates = [1e-3, 1e-4, 1e-5]
batch_sizes = [4, 8, 16]

best_accuracy = 0
best_params = {}

# Iterate over the hyperparameter combinations
for lr in learning_rates:
  for batch_size in batch_sizes:
    # Create a new model instance
    model = CNN()

    # Define the optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Train the model
    num_epochs = 25
    for epoch in range(num_epochs):
      running_loss = 0.0
      for i, data in enumerate(train_dataloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

      epoch_loss = running_loss / len(train_dataloader)
      print(f'Epoch {epoch+1}, Loss: {epoch_loss:.4f}')

    # Evaluate the model on the test set
    correct = 0
    total = 0
    with torch.no_grad():
      for data in test_dataloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Learning Rate: {lr}, Batch Size: {batch_size}')

    # Update the best parameters if the current accuracy is higher
    if accuracy > best_accuracy:
      best_accuracy = accuracy
      best_params = {'lr': lr, 'batch_size': batch_size}

print(f'Best Hyperparameters: {best_params}')


Epoch 1, Loss: 3.8670
Epoch 2, Loss: 2.7629
Epoch 3, Loss: 2.6331
Epoch 4, Loss: 2.5512
Epoch 5, Loss: 2.4492
Epoch 6, Loss: 2.4680
Epoch 7, Loss: 2.4013
Epoch 8, Loss: 2.2838
Epoch 9, Loss: 2.2377
Epoch 10, Loss: 2.1428
Epoch 11, Loss: 2.0980
Epoch 12, Loss: 1.9617
Epoch 13, Loss: 1.7862
Epoch 14, Loss: 1.6961
Epoch 15, Loss: 1.6602
Epoch 16, Loss: 1.4866
Epoch 17, Loss: 1.3734
Epoch 18, Loss: 1.3643
Epoch 19, Loss: 1.1661
Epoch 20, Loss: 1.2634
Epoch 21, Loss: 1.0857
Epoch 22, Loss: 1.0001
Epoch 23, Loss: 1.0005
Epoch 24, Loss: 1.0170
Epoch 25, Loss: 0.7851
Learning Rate: 0.001, Batch Size: 4
Epoch 1, Loss: 4.5899
Epoch 2, Loss: 2.8229
Epoch 3, Loss: 2.6204
Epoch 4, Loss: 2.5632
Epoch 5, Loss: 2.5352
Epoch 6, Loss: 2.3736
Epoch 7, Loss: 2.3667
Epoch 8, Loss: 2.1664
Epoch 9, Loss: 2.1270
Epoch 10, Loss: 2.0274
Epoch 11, Loss: 2.0045
Epoch 12, Loss: 1.8979
Epoch 13, Loss: 1.7180
Epoch 14, Loss: 1.6721
Epoch 15, Loss: 1.5046
Epoch 16, Loss: 1.3752
Epoch 17, Loss: 1.3525
Epoch 18, Loss: 

In [None]:
# Evaluate the model on the test set with the best parameters

model = CNN()
optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])

num_epochs = 25
for epoch in range(num_epochs):
  running_loss = 0.0
  for i, data in enumerate(train_dataloader, 0):
    inputs, labels = data

    optimizer.zero_grad()

    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

  epoch_loss = running_loss / len(train_dataloader)
  print(f'Epoch {epoch+1}, Loss: {epoch_loss:.4f}')

print(f'Final Accuracy: {epoch_acc:.2f}%')


Epoch 1, Loss: 2.8902
Epoch 2, Loss: 2.5921
Epoch 3, Loss: 2.3931
Epoch 4, Loss: 2.2716
Epoch 5, Loss: 2.0722
Epoch 6, Loss: 1.8744
Epoch 7, Loss: 1.6518
Epoch 8, Loss: 1.5268
Epoch 9, Loss: 1.3604
Epoch 10, Loss: 1.1748
Epoch 11, Loss: 1.0575
Epoch 12, Loss: 0.9309
Epoch 13, Loss: 0.7957
Epoch 14, Loss: 0.6400
Epoch 15, Loss: 0.6018
Epoch 16, Loss: 0.4909
Epoch 17, Loss: 0.3836
Epoch 18, Loss: 0.3433
Epoch 19, Loss: 0.3232
Epoch 20, Loss: 0.2880
Epoch 21, Loss: 0.2380
Epoch 22, Loss: 0.2028
Epoch 23, Loss: 0.1676
Epoch 24, Loss: 0.1672
Epoch 25, Loss: 0.1365
Final Accuracy: 82.92%


In [None]:
# Fine-tuning the model architecture

class CNN_FineTuned(nn.Module):
  def __init__(self):
    super(CNN_FineTuned, self).__init__()
    self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
    self.bn1 = nn.BatchNorm2d(32)
    self.relu1 = nn.ReLU()
    self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
    self.bn2 = nn.BatchNorm2d(64)
    self.relu2 = nn.ReLU()
    self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
    self.bn3 = nn.BatchNorm2d(128)
    self.relu3 = nn.ReLU()
    self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.fc1 = nn.Linear(128 * 8 * 8, 1024)
    self.relu4 = nn.ReLU()
    self.dropout = nn.Dropout(p=0.5)
    self.fc2 = nn.Linear(1024, 15)

  def forward(self, x):
    x = self.pool1(self.relu1(self.bn1(self.conv1(x))))
    x = self.pool2(self.relu2(self.bn2(self.conv2(x))))
    x = self.pool3(self.relu3(self.bn3(self.conv3(x))))
    x = x.view(-1, 128 * 8 * 8)
    x = self.dropout(self.relu4(self.fc1(x)))
    x = self.fc2(x)
    return x

model = CNN_FineTuned()

# Trying a different optimizer
optimizer = optim.SGD(model.parameters(), lr=best_params['lr'], momentum=0.9)

# Implementing learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [None]:
for epoch in range(num_epochs):
  running_loss = 0.0
  for i, data in enumerate(train_dataloader, 0):
    inputs, labels = data

    optimizer.zero_grad()

    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

  epoch_loss = running_loss / len(train_dataloader)
  print(f'Epoch {epoch+1}, Loss: {epoch_loss:.4f}')
  scheduler.step()

print(f'Final Accuracy: {epoch_acc:.2f}%')


Epoch 1, Loss: 1.0694
Epoch 2, Loss: 1.0904
Epoch 3, Loss: 1.0536
Epoch 4, Loss: 1.0420
Epoch 5, Loss: 1.0800
Epoch 6, Loss: 1.0417
Epoch 7, Loss: 1.0871
Epoch 8, Loss: 1.0368
Epoch 9, Loss: 1.0519
Epoch 10, Loss: 1.0681
Epoch 11, Loss: 1.0558
Epoch 12, Loss: 1.0470
Epoch 13, Loss: 1.0265
Epoch 14, Loss: 1.0414
Epoch 15, Loss: 1.0325
Epoch 16, Loss: 1.0770
Epoch 17, Loss: 1.0949
Epoch 18, Loss: 1.0516
Epoch 19, Loss: 1.0279
Epoch 20, Loss: 1.0535
Epoch 21, Loss: 1.0322
Epoch 22, Loss: 1.0443
Epoch 23, Loss: 1.0494
Epoch 24, Loss: 1.0196
Epoch 25, Loss: 1.0646
Final Accuracy: 82.92%


In [None]:

# Evaluate the model on the test set
correct = 0
total = 0
with torch.no_grad():
  for data in test_dataloader:
    images, labels = data
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Final Accuracy: {epoch_acc:.2f}%')


Final Accuracy: 82.67%


In [None]:

# Reference: https://medium.com/bitgrit-data-science-publication/building-an-image-classification-model-with-pytorch-from-scratch-f10452073212
# Reference: https://www.kaggle.com/code/arnoldyanga/image-classification-using-pytorch
# Reference: https://medium.com/thecyphy/train-cnn-model-with-pytorch-21dafb918f48
# Reference: https://towardsdatascience.com/pytorch-vision-multiclass-image-classification-531025193aa
# Reference: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
