## Pulling the repo

In [None]:
import os
import sys

# --- CONFIGURATION ---
REPO_NAME = "FusionKAN"
# Tip: If private, use: https://<your_token>@github.com/yourname/FusionKAN.git
REPO_URL = "https://github.com/DonKamilo00/FusionKAN.git" 
BRANCH = "main" 

# --- SETUP LOGIC ---
if not os.path.exists(REPO_NAME):
    print(f"üöÄ Cloning {REPO_NAME}...")
    !git clone {REPO_URL}
    %cd {REPO_NAME}
else:
    print(f"üîÑ Repo exists. Updating...")
    %cd {REPO_NAME}
    !git fetch origin
    !git reset --hard origin/{BRANCH} # Force overwrite local changes to match remote

# --- INSTALL BUILD DEPENDENCIES ---
# Ninja makes C++ compilation much faster
!pip install ninja 

# --- COMPILE & INSTALL FUSIONKAN ---
print("‚öôÔ∏è Compiling CUDA Kernels (this may take a moment)...")
# --no-deps: Don't waste time checking torch/numpy installation every time
# --force-reinstall: Ensures the C++ extension is actually rebuilt
!pip install . --verbose --no-deps --force-reinstall

print("‚úÖ Setup Complete. FusionKAN is ready.")

# Saving changes to the .Cu

In [11]:
!git add .
!git commit -m "update kernel"
!git push

Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@a4fdbe0801af.(none)')
fatal: could not read Username for 'https://github.com': No such device or address


# Updating

In [22]:
# Run this cell whenever you push changes to GitHub
%cd /content/FusionKAN
!git pull
!pip install . --no-deps --force-reinstall
print("‚úÖ Library Updated")

/content/FusionKAN
remote: Enumerating objects: 7, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 4 (delta 2), reused 4 (delta 2), pack-reused 0 (from 0)[K
Unpacking objects: 100% (4/4), 841 bytes | 841.00 KiB/s, done.
From https://github.com/DonKamilo00/FusionKAN
   d878856..f1087f0  main       -> origin/main
Updating d878856..f1087f0
Fast-forward
 csrc/fusion_kan.cu | 112 [32m++++++++++++++++++++++[m[31m-------------------------------[m
 1 file changed, 47 insertions(+), 65 deletions(-)
Processing /content/FusionKAN
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fusion_kan
  Building wheel for fusion_kan (setup.py) ... [?25l[?25hdone
  Created wheel for fusion_kan: filename=fusion_kan-1.0.0-cp312-cp312-linux_x86_64.whl size=119675 sha256=8d9ab1a8d48ec083177d073170962374da769bd4590558712ffc1d8069e19395
  Stored in directory: /tmp/pip-ephem-wheel-cache-oftrljh9/

‚úÖ Library Updated


# Verification

In [2]:
import torch
import fusion_kan
from fusion_kan import FusionKANLayer

# Verify the C++ backend loaded correctly
try:
    from fusion_kan.functional import _backend
    print("CUDA Backend Loaded Successfully")
except ImportError:
    print("‚ùå Error: CUDA Backend not found. Did compilation fail?")

# --- Your Benchmark / Test Code Here ---
batch_size = 4096
in_features = 32
out_features = 64

layer = FusionKANLayer(in_features, out_features).cuda()
x = torch.randn(batch_size, in_features).cuda()

# Forward pass
y = layer(x)
print(f"Forward pass output shape: {y.shape}")

# Backward pass (Critical for testing your new kernel gradients)
loss = y.sum()
loss.backward()
print("Backward pass successful")

‚ùå Error: CUDA Backend not found. Did compilation fail?


RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [8]:
!nvidia-smi

Sun Dec  7 13:20:09 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0 |
| N/A   62C    P0             32W /   72W |     267MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [24]:
# Run this in a Jupyter Cell to recompile
%cd /content/FusionKAN
!pip install . --no-deps --force-reinstall

/content/FusionKAN
Processing /content/FusionKAN
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fusion_kan
  Building wheel for fusion_kan (setup.py) ... [?25l[?25hdone
  Created wheel for fusion_kan: filename=fusion_kan-1.0.0-cp312-cp312-linux_x86_64.whl size=119675 sha256=b9442d3ce2e44181933f1e80408647a9a01c45079cbcac081c43b1035a82242d
  Stored in directory: /tmp/pip-ephem-wheel-cache-ex67f069/wheels/ba/ee/1a/c6dba8d3add4302b13e8353459dd01ef403bc9ab63abe170dd
Successfully built fusion_kan
Installing collected packages: fusion_kan
  Attempting uninstall: fusion_kan
    Found existing installation: fusion_kan 1.0.0
    Uninstalling fusion_kan-1.0.0:
      Successfully uninstalled fusion_kan-1.0.0
Successfully installed fusion_kan-1.0.0


In [25]:
import torch
import torch.nn as nn
from torch.autograd import gradcheck
import time
import importlib
import fusion_kan
importlib.reload(fusion_kan) # Force reload of the module
from fusion_kan.functional import FusionKANFunction
from fusion_kan.layer import FusionKANLayer

print(f"FusionKAN Version: {fusion_kan.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")

def run_gradient_check():
    print("\n--- 1. NUMERICAL CORRECTNESS (GradCheck) ---")
    # gradcheck requires double precision (float64) for numerical stability
    device = torch.device('cuda')
    
    # Small dimensions for valid numerical check
    B, In, Out = 4, 4, 8
    grid_size = 5
    
    # Inputs
    inputs = torch.randn(B, In, dtype=torch.float64, device=device, requires_grad=True)
    weights = torch.randn(Out, In, grid_size + 3, dtype=torch.float64, device=device, requires_grad=True)
    
    # Grid bounds (scalar tensors)
    grid_min = torch.tensor(-1.0, dtype=torch.float64, device=device, requires_grad=True)
    grid_max = torch.tensor(1.0, dtype=torch.float64, device=device, requires_grad=True)
    
    print("Running torch.autograd.gradcheck...")
    try:
        # We test the custom Function directly
        test = gradcheck(
            FusionKANFunction.apply, 
            (inputs, weights, grid_size, grid_min, grid_max), 
            eps=1e-6, 
            atol=1e-4
        )
        print(f"‚úÖ Gradient Check PASSED: {test}")
    except Exception as e:
        print(f"‚ùå Gradient Check FAILED")
        print(e)

def run_stress_test():
    print("\n--- 2. PERFORMANCE STRESS TEST (Shared Memory Tiling) ---")
    # This tests if the Block/Grid logic holds up under heavy load
    device = torch.device('cuda')
    
    # Large dimensions typical for 3D/NeRF
    B = 16384     # Large Batch
    In = 32       # Typical Coordinate Encoding width
    Out = 64      # Typical Hidden width
    grid_size = 100 # High resolution grid
    
    print(f"Config: Batch={B}, In={In}, Out={Out}, Grid={grid_size}")
    
    layer = FusionKANLayer(In, Out, grid_size=grid_size).to(device)
    x = torch.randn(B, In, device=device)
    
    # Warmup
    for _ in range(10):
        y = layer(x)
        loss = y.sum()
        loss.backward()
        
    torch.cuda.synchronize()
    start = time.time()
    
    steps = 100
    for _ in range(steps):
        # Forward
        y = layer(x)
        # Backward
        loss = y.sum()
        loss.backward()
        
    torch.cuda.synchronize()
    end = time.time()
    
    avg_time = (end - start) / steps * 1000 # ms
    print(f"‚úÖ Stress Test Complete.")
    print(f"Avg Time (Forward+Backward): {avg_time:.2f} ms per step")
    print(f"Throughput: {B * steps / (end - start):,.0f} samples/sec")

def run_convergence_test():
    print("\n--- 3. SANITY CHECK (Training Loop) ---")
    # Simple task: Learn Identity function y = x
    # If gradients are wrong, loss will explode or stay flat.
    
    model = FusionKANLayer(1, 1, grid_size=10).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    
    # Data: y = sin(x)
    x_train = torch.linspace(-2, 2, 1000).view(-1, 1).cuda()
    y_train = torch.sin(x_train)
    
    print("Training 50 steps...")
    for i in range(50):
        optimizer.zero_grad()
        pred = model(x_train)
        loss = torch.nn.functional.mse_loss(pred, y_train)
        loss.backward()
        optimizer.step()
        
        if i % 10 == 0:
            print(f"Step {i}: Loss {loss.item():.6f}")
            
    if loss.item() < 0.1:
        print("‚úÖ Convergence Check PASSED (Loss < 0.1)")
    else:
        print("‚ö†Ô∏è Convergence Check SUSPICIOUS (Loss is high)")

if __name__ == "__main__":
    run_gradient_check()
    run_stress_test()
    run_convergence_test()

FusionKAN Version: 1.0.0
CUDA Available: True

--- 1. NUMERICAL CORRECTNESS (GradCheck) ---
Running torch.autograd.gradcheck...
‚ùå Gradient Check FAILED
Jacobian mismatch for output 0 with respect to input 2,
numerical:tensor([[ 3.5091e+00, -6.2894e-02,  1.5061e+00, -7.9844e-03,  1.4906e+00,
         -4.4650e-01,  1.0797e+00, -5.4398e-01,  3.2259e+00, -2.7854e-01,
          1.7888e+00, -1.2435e+00,  1.5045e+00,  3.4104e-01, -1.7525e-01,
          9.4578e-01,  3.1987e+00, -1.6599e+00,  2.0668e+00,  6.2391e-01,
          1.3972e+00,  7.5625e-01,  1.5156e-03, -2.5742e+00, -1.1103e+00,
         -9.3917e-02, -2.8047e-01,  6.4728e-01,  5.4296e-01,  5.0253e-01,
         -1.4386e+00,  1.1189e+00]], device='cuda:0', dtype=torch.float64)
analytical:tensor([[ 1.0527e+01, -1.8871e-01,  4.5184e+00, -2.3943e-02,  4.4718e+00,
         -1.3395e+00,  3.2390e+00, -1.6320e+00,  9.6777e+00, -8.3565e-01,
          5.3665e+00, -3.7306e+00,  4.5136e+00,  1.0231e+00, -5.2573e-01,
          2.8373e+00,  9.596