In [None]:
import os
import zipfile
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision.transforms import Compose, ToTensor, Resize, Normalize
from PIL import Image
import scipy.io as sio
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# using google drive due to the size of the dataset and for easy integration with Google Colab
from google.colab import drive
drive.mount('/content/drive')

dataset_zip = '/content/drive/My Drive/data.zip'
output_dir = '/content/data'

with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
    zip_ref.extractall(output_dir)

!git clone https://github.com/jakariaemon/CNN-KAN.git
os.chdir('CNN-KAN')

from cnn_KAN import KANLinear

Mounted at /content/drive
Cloning into 'CNN-KAN'...
remote: Enumerating objects: 21, done.[K
remote: Counting objects: 100% (21/21), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 21 (delta 4), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (21/21), 11.22 KiB | 11.22 MiB/s, done.
Resolving deltas: 100% (4/4), done.
CNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=4096, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
conv1.weight: 864
conv1.bias: 32
conv2.weight: 18432
conv2.bias: 64
fc1.weight: 1048576
fc1.bias: 256
fc2.weight: 2560
fc2.bias: 10
Total trainable parameters: 1070794
-------

100%|██████████| 170M/170M [00:01<00:00, 86.8MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data

Test set: Average loss: 0.0047, Accuracy: 5933/10000 (59%)



In [None]:
from cnn_KAN import KANLinear

# Define Label Mapping
label_mapping = {
    'tennis_serve': 0,
    'golf_swing': 1,
    'baseball_pitch': 2,
    'bench_press': 3,
    'pullup': 4,
    'pushup': 5,
    'situp': 6,
    'jumping_jacks': 7,
    'strum_guitar': 8,
    'bowl': 9,
    'tennis_forehand': 10,
    'squat': 11,
    'jump_rope': 12,
    'clean_and_jerk': 13,
    'baseball_swing': 14
}

# Custom Dataset for Penn Action
class PennActionDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.frames_dir = os.path.join(root_dir, 'frames')
        self.labels_dir = os.path.join(root_dir, 'labels')
        self.samples = []

        for label_file in os.listdir(self.labels_dir):
            label_path = os.path.join(self.labels_dir, label_file)
            mat = sio.loadmat(label_path)
            action_label = mat['action'][0]
            if action_label in label_mapping:
                action_index = label_mapping[action_label]
                sequence_id = label_file.split('.')[0]
                frame_dir = os.path.join(self.frames_dir, sequence_id)
                frames = sorted(os.listdir(frame_dir))
                self.samples.extend([(os.path.join(frame_dir, frame), action_index) for frame in frames])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        frame_path, label = self.samples[idx]
        image = Image.open(frame_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

transform = Compose([
    Resize((224, 224)),  #ResNet requires input size of 224x224
    ToTensor(),
    Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # ImageNet mean and std
])

train_dataset = PennActionDataset(root_dir='/content/data/data/Penn_Action/Penn_Action', transform=transform)
test_dataset = PennActionDataset(root_dir='/content/data/data/Penn_Action/Penn_Action', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Define the CNN-KAN Model with ResNet Backbone
class ResNetKAN(nn.Module):
    def __init__(self, num_classes=15):
        super(ResNetKAN, self).__init__()
        # Load a pretrained ResNet model
        resnet = models.resnet101(pretrained=True)
        self.resnet_features = nn.Sequential(*list(resnet.children())[:-2])  # Remove the last FC layer

        self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))

        # KAN Layers
        self.kan1 = KANLinear(2048, 256)
        self.kan2 = KANLinear(256, num_classes)

    def forward(self, x):
        x = self.resnet_features(x)
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)  # Flatten for KAN input
        x = self.kan1(x)
        x = self.kan2(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNetKAN(num_classes=len(label_mapping)).to(device)

optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-3)
criterion = nn.CrossEntropyLoss()

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

def evaluate(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n')

for epoch in range(3):
    train(model, device, train_loader, optimizer, epoch)
    evaluate(model, device, test_loader)


Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:00<00:00, 205MB/s]



Test set: Average loss: 0.0084, Accuracy: 141217/163841 (86%)


Test set: Average loss: 0.0003, Accuracy: 162807/163841 (99%)


Test set: Average loss: 0.0002, Accuracy: 163132/163841 (100%)



In [None]:
def calculate_metrics(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    all_targets = []
    all_predictions = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            total += target.size(0)

            all_targets.extend(target.cpu().numpy())
            all_predictions.extend(pred.cpu().numpy().flatten())

    accuracy = 100. * correct / total

    all_targets = np.array(all_targets)
    all_predictions = np.array(all_predictions)
    rmse = np.sqrt(mean_squared_error(all_targets, all_predictions))
    mae = mean_absolute_error(all_targets, all_predictions)

    print(f'\nOverall Accuracy: {accuracy:.2f}%')
    print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')
    print(f'Mean Absolute Error (MAE): {mae:.4f}')

    return accuracy, rmse, mae

accuracy, rmse, mae = calculate_metrics(model, device, test_loader)


Overall Accuracy: 99.57%
Root Mean Squared Error (RMSE): 0.4423
Mean Absolute Error (MAE): 0.0239
