In [5]:
%pip install kaconv

Collecting kaconv
  Using cached kaconv-0.1.0-py3-none-any.whl.metadata (5.3 kB)
Collecting torch>=2.3.0 (from kaconv)
  Using cached torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
INFO: pip is looking at multiple versions of kaconv to determine which version is compatible with other requirements. This could take a while.
[31mERROR: Ignored the following yanked versions: 0.1.6, 0.1.7, 0.1.8, 0.1.9, 0.2.0, 0.2.1, 0.2.2, 0.2.2.post2, 0.2.2.post3, 0.15.0[0m[31m
[0m[31mERROR: Could not find a version that satisfies the requirement torchvision>=1.18.0 (from kaconv) (from versions: 0.12.0, 0.13.0, 0.13.1, 0.14.0, 0.14.1, 0.15.1, 0.15.2, 0.16.0, 0.16.1, 0.16.2, 0.17.0, 0.17.1, 0.17.2, 0.18.0, 0.18.1, 0.19.0, 0.19.1, 0.20.0, 0.20.1)[0m[31m
[0m[31mERROR: No matching distribution found for torchvision>=1.18.0[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [1]:
import torch
from torch import nn
from torchvision import datasets, transforms
from tqdm import tqdm
import random
import numpy as np
import time

In [4]:
# Import FastKANConvLayer and ConvKAN classes
from kaconv.convkan import ConvKAN
from kaconv.kaconv import FastKANConvLayer
from torch.nn import Conv2d, BatchNorm2d

ModuleNotFoundError: No module named 'kaconv'

In [3]:
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    # Ensures deterministic behavior on CPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [4]:
class CIFAR10_KAN_Model(nn.Module):
    def __init__(self, kan_type="RBF"):
        super(CIFAR10_KAN_Model, self).__init__()
        self.network = nn.Sequential(
            FastKANConvLayer(3, 32, padding=1, kernel_size=3, stride=1, kan_type=kan_type),
            BatchNorm2d(32),
            nn.ReLU(),
            FastKANConvLayer(32, 64, padding=1, kernel_size=3, stride=2, kan_type=kan_type),
            BatchNorm2d(64),
            nn.ReLU(),
            FastKANConvLayer(64, 128, padding=1, kernel_size=3, stride=2, kan_type=kan_type),
            BatchNorm2d(128),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        return self.network(x)

In [5]:
# Main training function
def train_model():
    # Set seed
    set_seed(44)

    # Check device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Model initialization
    model = CIFAR10_KAN_Model(kan_type="RBF").to(device)

    # Define transformations and load dataset
    transform_train = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.RandomAffine(0, translate=(0.1, 0.1)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.RandomResizedCrop(32, scale=(0.8, 1.0)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])

    transform_val = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])

    train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)

    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-2, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 150, eta_min=0.0001)

    # Training loop
    best_acc = 0
    for epoch in tqdm(range(150), desc="Training"):
        model.train()
        running_loss = 0.0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            y_hat = model(x)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Evaluation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for x, y in test_loader:
                x, y = x.to(device), y.to(device)
                y_hat = model(x)
                _, predicted = torch.max(y_hat, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()

        acc = 100 * correct / total
        if acc > best_acc:
            best_acc = acc

        print(
            f'Epoch [{epoch + 1}/150], Loss: {running_loss / len(train_loader):.4f}, Accuracy: {acc:.2f}%, Best Accuracy: {best_acc:.2f}%')

        # Update learning rate
        scheduler.step()

    print(f"Training complete. Best accuracy: {best_acc:.2f}%")

In [None]:

if __name__ == '__main__':
    train_model()

Files already downloaded and verified
Files already downloaded and verified


Training:   0%|                                                                                | 0/150 [00:00<?, ?it/s]