In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error

In [None]:
os.environ['KAGGLE_USERNAME'] = 'yourkaggleusername'
os.environ['KAGGLE_KEY'] = 'yourkaggleapikey'

# Download HAR dataset
!kaggle datasets download -d meetnagadia/human-action-recognition-har-dataset --unzip

Dataset URL: https://www.kaggle.com/datasets/meetnagadia/human-action-recognition-har-dataset
License(s): ODbL-1.0


In [None]:
!git clone https://github.com/KindXiaoming/pykan.git
!pip install ./pykan

Cloning into 'pykan'...
remote: Enumerating objects: 4221, done.[K
remote: Counting objects: 100% (661/661), done.[K
remote: Compressing objects: 100% (240/240), done.[K
remote: Total 4221 (delta 573), reused 421 (delta 421), pack-reused 3560 (from 3)[K
Receiving objects: 100% (4221/4221), 114.77 MiB | 45.25 MiB/s, done.
Resolving deltas: 100% (1580/1580), done.
Processing ./pykan
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pykan
  Building wheel for pykan (setup.py) ... [?25l[?25hdone
  Created wheel for pykan: filename=pykan-0.2.8-py3-none-any.whl size=78235 sha256=b02e7596740392097b5d754ea11f94bda6c94df865f5267a081758bcf5fa2796
  Stored in directory: /tmp/pip-ephem-wheel-cache-r1cn7mc4/wheels/05/9b/6c/6f9f5a9927ba27c99b92cf0cbdd57f190932c31289c49eded1
Successfully built pykan
Installing collected packages: pykan
Successfully installed pykan-0.2.8


In [None]:
data_path = '/content/Human Action Recognition'
csv_path = os.path.join(data_path, 'Training_set.csv')

In [None]:
class HARCSVImageDataset(Dataset):
    def __init__(self, root_dir, csv_file, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []
        self.labels = []

        df = pd.read_csv(csv_file)
        for _, row in df.iterrows():
            img_path = os.path.join(self.root_dir, 'train', row['filename'])
            if os.path.isfile(img_path):
                self.data.append(img_path)
                self.labels.append(row['label'])

        self.label_mapping = {label: idx for idx, label in enumerate(sorted(set(self.labels)))}
        self.labels = [self.label_mapping[label] for label in self.labels]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# Transformations
transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])


In [None]:
train_dataset = HARCSVImageDataset(root_dir=data_path, csv_file=csv_path, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=2)  # Reuse same CSV for test


In [None]:
from pykan.kan import KANLayer

# ViT-KAN model definition
vit_weights = models.ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1
vit = models.vit_b_16(weights=vit_weights)

for param in vit.parameters():
    param.requires_grad = False

# KAN classification head
class KANHead(nn.Module):
    def __init__(self, embedding_dim, hidden_dims, num_classes):
        super(KANHead, self).__init__()
        self.norm = nn.LayerNorm(embedding_dim)
        self.kan1 = KANLayer(in_dim=embedding_dim, out_dim=hidden_dims[0])
        self.kan2 = KANLayer(in_dim=hidden_dims[0], out_dim=hidden_dims[1])
        self.kan3 = KANLayer(in_dim=hidden_dims[1], out_dim=hidden_dims[2])
        self.out = KANLayer(in_dim=hidden_dims[2], out_dim=num_classes)

    def forward(self, x):
        x = self.norm(x)
        x = self.kan1(x)[0]
        x = F.relu(x)
        x = self.kan2(x)[0]
        x = F.relu(x)
        x = self.kan3(x)[0]
        x = F.relu(x)
        x = self.out(x)[0]
        return x

vit.heads = KANHead(embedding_dim=768, hidden_dims=[128, 64, 32], num_classes=len(train_dataset.label_mapping))


Downloading: "https://download.pytorch.org/models/vit_b_16_swag-9ac1b537.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16_swag-9ac1b537.pth
100%|██████████| 331M/331M [00:01<00:00, 215MB/s]


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
vit.to(device)

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vit.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 10 == 0:
            print(f'Epoch {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}] Loss: {loss.item():.4f}')
    return running_loss / len(train_loader)

In [None]:
def evaluate(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    print(f'\nTest Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%\n')
    return test_loss

In [None]:
for epoch in range(10):
    train_loss = train(vit, device, train_loader, optimizer, epoch)
    test_loss = evaluate(vit, device, test_loader)
    scheduler.step(test_loss)

Epoch 0 [0/12600] Loss: 2.7097
Epoch 0 [320/12600] Loss: 2.7070
Epoch 0 [640/12600] Loss: 2.6772
Epoch 0 [960/12600] Loss: 2.5766
Epoch 0 [1280/12600] Loss: 2.2665
Epoch 0 [1600/12600] Loss: 1.9802
Epoch 0 [1920/12600] Loss: 1.8342
Epoch 0 [2240/12600] Loss: 1.5472
Epoch 0 [2560/12600] Loss: 1.5649
Epoch 0 [2880/12600] Loss: 1.2773
Epoch 0 [3200/12600] Loss: 1.3831
Epoch 0 [3520/12600] Loss: 1.0775
Epoch 0 [3840/12600] Loss: 0.9282
Epoch 0 [4160/12600] Loss: 1.0041
Epoch 0 [4480/12600] Loss: 0.7744
Epoch 0 [4800/12600] Loss: 1.1212
Epoch 0 [5120/12600] Loss: 1.0406
Epoch 0 [5440/12600] Loss: 0.7520
Epoch 0 [5760/12600] Loss: 0.9868
Epoch 0 [6080/12600] Loss: 1.2061
Epoch 0 [6400/12600] Loss: 0.7668
Epoch 0 [6720/12600] Loss: 0.8787
Epoch 0 [7040/12600] Loss: 0.6573
Epoch 0 [7360/12600] Loss: 0.7075
Epoch 0 [7680/12600] Loss: 0.9792
Epoch 0 [8000/12600] Loss: 0.9607
Epoch 0 [8320/12600] Loss: 0.5484
Epoch 0 [8640/12600] Loss: 0.7819
Epoch 0 [8960/12600] Loss: 0.2990
Epoch 0 [9280/12600]



Epoch 1 [0/12600] Loss: 0.3558
Epoch 1 [320/12600] Loss: 0.5088
Epoch 1 [640/12600] Loss: 0.3287
Epoch 1 [960/12600] Loss: 0.6450
Epoch 1 [1280/12600] Loss: 0.2953
Epoch 1 [1600/12600] Loss: 0.5228
Epoch 1 [1920/12600] Loss: 0.5685
Epoch 1 [2240/12600] Loss: 0.5311
Epoch 1 [2560/12600] Loss: 0.3088
Epoch 1 [2880/12600] Loss: 0.5532
Epoch 1 [3200/12600] Loss: 0.4052
Epoch 1 [3520/12600] Loss: 0.6004
Epoch 1 [3840/12600] Loss: 0.3198
Epoch 1 [4160/12600] Loss: 0.8032
Epoch 1 [4480/12600] Loss: 0.4545
Epoch 1 [4800/12600] Loss: 0.3839
Epoch 1 [5120/12600] Loss: 0.6890
Epoch 1 [5440/12600] Loss: 0.4581
Epoch 1 [5760/12600] Loss: 0.5380
Epoch 1 [6080/12600] Loss: 0.6418
Epoch 1 [6400/12600] Loss: 0.9909
Epoch 1 [6720/12600] Loss: 0.5877
Epoch 1 [7040/12600] Loss: 0.5184
Epoch 1 [7360/12600] Loss: 0.5475
Epoch 1 [7680/12600] Loss: 0.4848
Epoch 1 [8000/12600] Loss: 0.9601
Epoch 1 [8320/12600] Loss: 1.1520
Epoch 1 [8640/12600] Loss: 0.5965
Epoch 1 [8960/12600] Loss: 0.6424
Epoch 1 [9280/12600]

In [None]:
def calculate_metrics(model, device, loader):
    model.eval()
    all_targets, all_preds = [], []
    with torch.no_grad():
        for data, targets in loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            preds = outputs.argmax(dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_targets.extend(targets.cpu().numpy())

    accuracy = accuracy_score(all_targets, all_preds)
    rmse = np.sqrt(mean_squared_error(all_targets, all_preds))
    mae = mean_absolute_error(all_targets, all_preds)

    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE: {mae:.4f}")
    return accuracy, rmse, mae

In [None]:
calculate_metrics(vit, device, test_loader)

Accuracy: 98.98%
RMSE: 0.7081
MAE: 0.0592


(0.9898412698412699, np.float64(0.7081162132224988), 0.05920634920634921)