In [None]:
import os
import zipfile
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision.transforms import Compose, ToTensor, Resize, Normalize, RandomHorizontalFlip, RandomRotation
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
os.environ['KAGGLE_USERNAME'] = 'yourkaggleusername'
os.environ['KAGGLE_KEY'] = 'yourkaggleapikey'

# Download dataset from Kaggle
!kaggle datasets download -d meetnagadia/human-action-recognition-har-dataset --unzip

# Clone the repository containing CNN-KAN code
!git clone https://github.com/jakariaemon/CNN-KAN.git
os.chdir('CNN-KAN')

Dataset URL: https://www.kaggle.com/datasets/meetnagadia/human-action-recognition-har-dataset
License(s): ODbL-1.0
Downloading human-action-recognition-har-dataset.zip to /content/CNN-KAN/CNN-KAN
100% 296M/297M [00:13<00:00, 25.0MB/s]
100% 297M/297M [00:13<00:00, 22.3MB/s]
Cloning into 'CNN-KAN'...
remote: Enumerating objects: 21, done.[K
remote: Counting objects: 100% (21/21), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 21 (delta 4), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (21/21), 11.22 KiB | 11.22 MiB/s, done.
Resolving deltas: 100% (4/4), done.


In [None]:
data_path = '/content/CNN-KAN/Human Action Recognition'
train_csv_path = os.path.join(data_path, 'Training_set.csv')

In [None]:
from cnn_KAN import KANLinear

# custom dataset class for HAR dataset
class HARDataset(Dataset):
    def __init__(self, root_dir, csv_file, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []
        self.labels = []

        df = pd.read_csv(csv_file)
        for _, row in df.iterrows():
            img_path = os.path.join(self.root_dir, 'train', row['filename'])
            if os.path.isfile(img_path):
                self.data.append(img_path)
                self.labels.append(row['label'])

        # Label mapping
        self.label_mapping = {label: idx for idx, label in enumerate(sorted(set(self.labels)))}
        self.labels = [self.label_mapping[label] for label in self.labels]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label


CNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=4096, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
conv1.weight: 864
conv1.bias: 32
conv2.weight: 18432
conv2.bias: 64
fc1.weight: 1048576
fc1.bias: 256
fc2.weight: 2560
fc2.bias: 10
Total trainable parameters: 1070794
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
         MaxPool2d-2           [-1, 32, 16, 16]               0
            Conv2d-3           [-1, 64, 16, 16]          18,496
         MaxPool2d-4             [-1, 64, 8, 8]        

100%|██████████| 170M/170M [00:13<00:00, 12.9MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data

Test set: Average loss: 0.0047, Accuracy: 5833/10000 (58%)



In [None]:
transform = Compose([
    Resize((224, 224)),
    ToTensor(),
    Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

# Prepare Dataset and DataLoader
train_dataset = HARDataset(root_dir=data_path, csv_file=train_csv_path, transform=transform)
test_dataset = HARDataset(root_dir=data_path, csv_file=train_csv_path, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

In [None]:
class ResNetKAN(nn.Module):
    def __init__(self, num_classes=15):
        super(ResNetKAN, self).__init__()
        # Load a pretrained ResNet model
        resnet = models.resnet101(pretrained=True)
        self.resnet_features = nn.Sequential(*list(resnet.children())[:-2])  # Remove the last FC layer

        self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))

        # KAN Layers
        self.kan1 = KANLinear(2048, 256)
        self.kan2 = KANLinear(256, num_classes)

    def forward(self, x):
        x = self.resnet_features(x)
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)  # Flatten for KAN input
        x = self.kan1(x)
        x = self.kan2(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNetKAN(num_classes=len(train_dataset.label_mapping)).to(device)

optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-3)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1)
criterion = nn.CrossEntropyLoss()



In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 10 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')
    return running_loss / len(train_loader)

def evaluate(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n')
    return test_loss

In [None]:
for epoch in range(3):
    train_loss = train(model, device, train_loader, optimizer, epoch)
    test_loss = evaluate(model, device, test_loader)
    scheduler.step(test_loss)


Test set: Average loss: 0.0124, Accuracy: 11190/12600 (89%)


Test set: Average loss: 0.0047, Accuracy: 12117/12600 (96%)


Test set: Average loss: 0.0016, Accuracy: 12444/12600 (99%)



In [None]:
def calculate_metrics(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    all_targets = []
    all_predictions = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            total += target.size(0)

            all_targets.extend(target.cpu().numpy())
            all_predictions.extend(pred.cpu().numpy().flatten())

    accuracy = 100. * correct / total

    all_targets = np.array(all_targets)
    all_predictions = np.array(all_predictions)
    rmse = np.sqrt(mean_squared_error(all_targets, all_predictions))
    mae = mean_absolute_error(all_targets, all_predictions)

    print(f'\nOverall Accuracy: {accuracy:.2f}%')
    print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')
    print(f'Mean Absolute Error (MAE): {mae:.4f}')

    return accuracy, rmse, mae


In [None]:
accuracy, rmse, mae = calculate_metrics(model, device, test_loader)


Overall Accuracy: 98.76%
Root Mean Squared Error (RMSE): 0.7234
Mean Absolute Error (MAE): 0.0690
