In [1]:
import os
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from tqdm import tqdm

In [2]:
folder_path = r"ImageSplits"
train = []
test = []
actions = {}

In [3]:
for filename in os.listdir(folder_path):
    if filename == "train.txt":
        file_path = os.path.join(folder_path, filename)
        with open(file_path,"r") as file:
            for line in file:
                train.append(line.strip())
    elif filename == "test.txt":
        file_path = os.path.join(folder_path,filename)
        with open(file_path,"r") as file:
            for line in file:
                test.append(line.strip())
    elif filename == "actions.txt":
        file_path = os.path.join(folder_path, filename)
        with open(file_path,"r") as file:
            next(file)
            idx = 0
            for line in file:
                actions[line.split()[0]] = idx
                idx+=1

In [4]:
class Dataset(Dataset):
    def __init__(self,file_list, actions, image_dir, transform = None):
        self.file_list = file_list
        self.actions = actions
        self.image_dir = image_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_name = self.file_list[idx]
        class_name = "_".join(file_name.split("_")[:-1])
        label = self.actions[class_name]
        image_path = os.path.join(self.image_dir,file_name)
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label
        

In [5]:
def salt_pepper(image):
    np_image = np.array(image)
    h, w, c = np_image.shape
    s_vs_p = 0.5
    amount = 0.02
    noisy = np.copy(np_image)
    num_salt = int(np.ceil(amount * s_vs_p * np_image.size))
    salt_coords = tuple(np.clip(np.random.randint(0, dim, num_salt),0,dim - 1) for dim in np_image.shape[:2])
    noisy[salt_coords[0], salt_coords[1], :] = 255
    num_pepper = int(np.ceil(amount * (1 - s_vs_p) * np_image.size))
    pepper_coords = tuple(np.clip(np.random.randint(0, dim, num_pepper),0,dim - 1) for dim in np_image.shape[:2])
    noisy[pepper_coords[0], pepper_coords[1], :] = 0
    
    return Image.fromarray(noisy.astype(np.uint8))

In [6]:
augmentations = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(128, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomApply([transforms.Lambda(salt_pepper)], p=0.5),
    transforms.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 2.0)),
    transforms.ToTensor()
])
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

In [7]:
train_transform = transforms.Compose([augmentations, normalize])
test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    normalize
])

In [8]:
image_dir = r"JPEGImages"
train_dataset = Dataset(train, actions, image_dir, transform=train_transform)
test_dataset = Dataset(test, actions, image_dir, transform=test_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [9]:
class ModifiedResNet(nn.Module):
    def __init__(self, num_classes):
        super(ModifiedResNet, self).__init__()
    
        self.model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
        
        in_features = self.model.fc.in_features  
        self.model.fc = nn.Sequential(
            nn.Linear(in_features, 512),  
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3), 
            nn.Linear(512, 128),  
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),  
            nn.Linear(128, 64),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3),  
            nn.Linear(64, num_classes)  
        )
    def forward(self, x):
        return self.model(x)

In [10]:
num_classes = 40 
model = ModifiedResNet(num_classes=num_classes)
print(model)
model = model.to("cuda" if torch.cuda.is_available() else "cpu")

ModifiedResNet(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
     

In [None]:
def train_and_evaluate(model, train_loader, test_loader, num_epochs, save_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',factor=0.1, patience=2)


    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        with tqdm(total=len(train_loader), desc="Training", unit="batch") as pbar:
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                train_correct += (predicted == labels).sum().item()
                train_total += labels.size(0)


                pbar.set_postfix({"Loss": f"{loss.item():.4f}"})
                pbar.update()

        train_accuracy = 100 * train_correct / train_total
        print(f"Train Loss: {train_loss / len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%")

        model.eval()
        test_loss = 0.0
        test_correct = 0
        test_total = 0

        with tqdm(total=len(test_loader), desc="Testing", unit="batch") as pbar:
            with torch.no_grad():
                for images, labels in test_loader:
                    images, labels = images.to(device), labels.to(device)

                    outputs = model(images)
                    loss = criterion(outputs, labels)

                    test_loss += loss.item()
                    _, predicted = torch.max(outputs, 1)
                    test_correct += (predicted == labels).sum().item()
                    test_total += labels.size(0)

                    pbar.set_postfix({"Loss": f"{loss.item():.4f}"})
                    pbar.update()

        test_accuracy = 100 * test_correct / test_total
        scheduler.step(test_loss / len(test_loader))
        print(f"Test Loss: {test_loss / len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%")

    torch.save(model.state_dict(), save_path)
    print(f"Model saved to {save_path}")



In [36]:
save_path = "ResNet50.pth"
train_and_evaluate(model, train_loader, test_loader, num_epochs=15, save_path=save_path)

Epoch 1/15


Training: 100%|██████████| 125/125 [00:33<00:00,  3.75batch/s, Loss=3.4076]


Train Loss: 3.6429, Train Accuracy: 6.47%


Testing: 100%|██████████| 173/173 [00:23<00:00,  7.36batch/s, Loss=2.7333]


Test Loss: 2.9156, Test Accuracy: 30.73%
Epoch 2/15


Training: 100%|██████████| 125/125 [00:55<00:00,  2.25batch/s, Loss=2.8153]


Train Loss: 3.1939, Train Accuracy: 17.35%


Testing: 100%|██████████| 173/173 [00:51<00:00,  3.34batch/s, Loss=2.4871]


Test Loss: 2.5062, Test Accuracy: 40.53%
Epoch 3/15


Training: 100%|██████████| 125/125 [01:03<00:00,  1.97batch/s, Loss=2.5847]


Train Loss: 2.8280, Train Accuracy: 28.12%


Testing: 100%|██████████| 173/173 [00:49<00:00,  3.51batch/s, Loss=2.4197]


Test Loss: 2.2370, Test Accuracy: 49.37%
Epoch 4/15


Training: 100%|██████████| 125/125 [00:48<00:00,  2.57batch/s, Loss=2.3086]


Train Loss: 2.5219, Train Accuracy: 37.17%


Testing: 100%|██████████| 173/173 [00:21<00:00,  8.24batch/s, Loss=2.2345]


Test Loss: 1.9688, Test Accuracy: 55.71%
Epoch 5/15


Training: 100%|██████████| 125/125 [00:32<00:00,  3.87batch/s, Loss=1.8881]


Train Loss: 2.2690, Train Accuracy: 43.42%


Testing: 100%|██████████| 173/173 [00:21<00:00,  8.23batch/s, Loss=2.0427]


Test Loss: 1.7159, Test Accuracy: 61.32%
Epoch 6/15


Training: 100%|██████████| 125/125 [00:54<00:00,  2.29batch/s, Loss=2.5348]


Train Loss: 2.0762, Train Accuracy: 48.60%


Testing: 100%|██████████| 173/173 [00:48<00:00,  3.58batch/s, Loss=2.0192]


Test Loss: 1.5338, Test Accuracy: 63.88%
Epoch 7/15


Training: 100%|██████████| 125/125 [00:52<00:00,  2.38batch/s, Loss=2.0716]


Train Loss: 1.8755, Train Accuracy: 53.30%


Testing: 100%|██████████| 173/173 [00:46<00:00,  3.76batch/s, Loss=1.8591]


Test Loss: 1.4061, Test Accuracy: 65.67%
Epoch 8/15


Training: 100%|██████████| 125/125 [00:46<00:00,  2.71batch/s, Loss=1.7223]


Train Loss: 1.7060, Train Accuracy: 58.38%


Testing: 100%|██████████| 173/173 [00:20<00:00,  8.38batch/s, Loss=1.7014]


Test Loss: 1.2896, Test Accuracy: 67.68%
Epoch 9/15


Training: 100%|██████████| 125/125 [00:32<00:00,  3.80batch/s, Loss=1.8430]


Train Loss: 1.5683, Train Accuracy: 60.50%


Testing: 100%|██████████| 173/173 [00:21<00:00,  8.12batch/s, Loss=1.8958]


Test Loss: 1.2401, Test Accuracy: 68.40%
Epoch 10/15


Training: 100%|██████████| 125/125 [00:33<00:00,  3.77batch/s, Loss=1.0493]


Train Loss: 1.4737, Train Accuracy: 62.98%


Testing: 100%|██████████| 173/173 [00:21<00:00,  7.99batch/s, Loss=1.9357]


Test Loss: 1.1817, Test Accuracy: 68.87%
Epoch 11/15


Training: 100%|██████████| 125/125 [00:32<00:00,  3.82batch/s, Loss=1.5761]


Train Loss: 1.3694, Train Accuracy: 65.22%


Testing: 100%|██████████| 173/173 [00:21<00:00,  8.20batch/s, Loss=1.8506]


Test Loss: 1.1349, Test Accuracy: 69.61%
Epoch 12/15


Training: 100%|██████████| 125/125 [00:32<00:00,  3.79batch/s, Loss=1.0873]


Train Loss: 1.2366, Train Accuracy: 67.78%


Testing: 100%|██████████| 173/173 [00:21<00:00,  8.20batch/s, Loss=1.6734]


Test Loss: 1.0834, Test Accuracy: 70.55%
Epoch 13/15


Training: 100%|██████████| 125/125 [00:32<00:00,  3.84batch/s, Loss=1.3866]


Train Loss: 1.1594, Train Accuracy: 70.40%


Testing: 100%|██████████| 173/173 [00:21<00:00,  8.20batch/s, Loss=1.5725]


Test Loss: 1.0581, Test Accuracy: 71.13%
Epoch 14/15


Training: 100%|██████████| 125/125 [00:32<00:00,  3.81batch/s, Loss=0.8161]


Train Loss: 1.0963, Train Accuracy: 71.60%


Testing: 100%|██████████| 173/173 [00:20<00:00,  8.32batch/s, Loss=1.7736]


Test Loss: 1.0450, Test Accuracy: 71.37%
Epoch 15/15


Training: 100%|██████████| 125/125 [00:32<00:00,  3.81batch/s, Loss=0.8924]


Train Loss: 1.0244, Train Accuracy: 73.72%


Testing: 100%|██████████| 173/173 [00:21<00:00,  8.06batch/s, Loss=1.4583]


Test Loss: 1.0272, Test Accuracy: 71.62%
Model saved to ResNet50.pth
