## Pulling the repo

In [5]:
import os
import sys

# --- CONFIGURATION ---
REPO_NAME = "FusionKAN"
# Tip: If private, use: https://<your_token>@github.com/yourname/FusionKAN.git
REPO_URL = "https://github.com/DonKamilo00/FusionKAN.git" 
BRANCH = "main" 

# --- SETUP LOGIC ---
if not os.path.exists(REPO_NAME):
    print(f"üöÄ Cloning {REPO_NAME}...")
    !git clone {REPO_URL}
    %cd {REPO_NAME}
else:
    print(f"üîÑ Repo exists. Updating...")
    %cd {REPO_NAME}
    !git fetch origin
    !git reset --hard origin/{BRANCH} # Force overwrite local changes to match remote

# --- INSTALL BUILD DEPENDENCIES ---
# Ninja makes C++ compilation much faster
!pip install ninja 

# --- COMPILE & INSTALL FUSIONKAN ---
print("‚öôÔ∏è Compiling CUDA Kernels (this may take a moment)...")
# --no-deps: Don't waste time checking torch/numpy installation every time
# --force-reinstall: Ensures the C++ extension is actually rebuilt
!pip install . --verbose --no-deps --force-reinstall

print("‚úÖ Setup Complete. FusionKAN is ready.")

üöÄ Cloning FusionKAN...
Cloning into 'FusionKAN'...
remote: Enumerating objects: 223, done.[K
remote: Counting objects: 100% (223/223), done.[K
remote: Compressing objects: 100% (155/155), done.[K
remote: Total 223 (delta 119), reused 166 (delta 63), pack-reused 0 (from 0)[K
Receiving objects: 100% (223/223), 201.71 KiB | 25.21 MiB/s, done.
Resolving deltas: 100% (119/119), done.
/content/FusionKAN/FusionKAN
‚öôÔ∏è Compiling CUDA Kernels (this may take a moment)...
Using pip 24.1.2 from /usr/local/lib/python3.12/dist-packages/pip (python 3.12)
Processing /content/FusionKAN/FusionKAN
  Running command python setup.py egg_info
  running egg_info
  creating /tmp/pip-pip-egg-info-2qmh005m/fusion_kan.egg-info
  writing /tmp/pip-pip-egg-info-2qmh005m/fusion_kan.egg-info/PKG-INFO
  writing dependency_links to /tmp/pip-pip-egg-info-2qmh005m/fusion_kan.egg-info/dependency_links.txt
  writing requirements to /tmp/pip-pip-egg-info-2qmh005m/fusion_kan.egg-info/requires.txt
  writing top-leve

‚úÖ Setup Complete. FusionKAN is ready.


# Saving changes to the .Cu

In [11]:
!git add .
!git commit -m "update kernel"
!git push

Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@a4fdbe0801af.(none)')
fatal: could not read Username for 'https://github.com': No such device or address


# Updating

In [12]:
# Run this cell whenever you push changes to GitHub
%cd /content/FusionKAN
!git pull
!pip install . --no-deps --force-reinstall
print("‚úÖ Library Updated")

/content/FusionKAN
remote: Enumerating objects: 8, done.[K
remote: Counting objects: 100% (8/8), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 5 (delta 2), reused 5 (delta 2), pack-reused 0 (from 0)[K
Unpacking objects: 100% (5/5), 5.24 KiB | 5.24 MiB/s, done.
From https://github.com/DonKamilo00/FusionKAN
   9d9d885..545f754  main       -> origin/main
Updating 9d9d885..545f754
Fast-forward
 csrc/fusion_kan.cu  | 136 [32m++++++++++[m[31m------------[m
 csrc/fusion_kan.txt | 325 [32m++++++++++++++++++++++++++++++++++++++++[m[31m------------[m
 dev_fusion.ipynb    | 299 [32m+++++++++++++++++++++++++++++++++++++++++++++++[m
 3 files changed, 612 insertions(+), 148 deletions(-)
 create mode 100644 dev_fusion.ipynb
Processing /content/FusionKAN
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fusion_kan
  Building wheel for fusion_kan (setup.py) ... [?25l[?25hdone
  Created wheel for fusion_kan: filename=fus

‚úÖ Library Updated


# Verification

In [13]:
import torch
import fusion_kan
from fusion_kan import FusionKANLayer

# Verify the C++ backend loaded correctly
try:
    from fusion_kan.functional import _backend
    print("CUDA Backend Loaded Successfully")
except ImportError:
    print("‚ùå Error: CUDA Backend not found. Did compilation fail?")

# --- Your Benchmark / Test Code Here ---
batch_size = 4096
in_features = 32
out_features = 64

layer = FusionKANLayer(in_features, out_features).cuda()
x = torch.randn(batch_size, in_features).cuda()

# Forward pass
y = layer(x)
print(f"Forward pass output shape: {y.shape}")

# Backward pass (Critical for testing your new kernel gradients)
loss = y.sum()
loss.backward()
print("Backward pass successful")

CUDA Backend Loaded Successfully
Forward pass output shape: torch.Size([4096, 64])
Backward pass successful


In [8]:
!nvidia-smi

Sun Dec  7 13:20:09 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0 |
| N/A   62C    P0             32W /   72W |     267MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [14]:
# Run this in a Jupyter Cell to recompile
%cd /content/FusionKAN
!pip install . --no-deps --force-reinstall

/content/FusionKAN
Processing /content/FusionKAN
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fusion_kan
  Building wheel for fusion_kan (setup.py) ... [?25l[?25hdone
  Created wheel for fusion_kan: filename=fusion_kan-1.0.0-cp312-cp312-linux_x86_64.whl size=120082 sha256=1133668c93c6c0df9a2ae16cfea2c17ec20ea8125862fd48961874db8d163f0a
  Stored in directory: /tmp/pip-ephem-wheel-cache-rbttj1d5/wheels/ba/ee/1a/c6dba8d3add4302b13e8353459dd01ef403bc9ab63abe170dd
Successfully built fusion_kan
Installing collected packages: fusion_kan
  Attempting uninstall: fusion_kan
    Found existing installation: fusion_kan 1.0.0
    Uninstalling fusion_kan-1.0.0:
      Successfully uninstalled fusion_kan-1.0.0
Successfully installed fusion_kan-1.0.0


In [15]:
import torch
import torch.nn as nn
from torch.autograd import gradcheck
import time
import importlib
import fusion_kan
importlib.reload(fusion_kan) # Force reload of the module
from fusion_kan.functional import FusionKANFunction
from fusion_kan.layer import FusionKANLayer

print(f"FusionKAN Version: {fusion_kan.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")

def run_gradient_check():
    print("\n--- 1. NUMERICAL CORRECTNESS (GradCheck) ---")
    # gradcheck requires double precision (float64) for numerical stability
    device = torch.device('cuda')
    
    # Small dimensions for valid numerical check
    B, In, Out = 4, 4, 8
    grid_size = 5
    
    # Inputs
    inputs = torch.randn(B, In, dtype=torch.float64, device=device, requires_grad=True)
    weights = torch.randn(Out, In, grid_size + 3, dtype=torch.float64, device=device, requires_grad=True)
    
    # Grid bounds (scalar tensors)
    grid_min = torch.tensor(-1.0, dtype=torch.float64, device=device, requires_grad=True)
    grid_max = torch.tensor(1.0, dtype=torch.float64, device=device, requires_grad=True)
    
    print("Running torch.autograd.gradcheck...")
    try:
        # We test the custom Function directly
        test = gradcheck(
            FusionKANFunction.apply, 
            (inputs, weights, grid_size, grid_min, grid_max), 
            eps=1e-6, 
            atol=1e-4
        )
        print(f"‚úÖ Gradient Check PASSED: {test}")
    except Exception as e:
        print(f"‚ùå Gradient Check FAILED")
        print(e)

def run_stress_test():
    print("\n--- 2. PERFORMANCE STRESS TEST (Shared Memory Tiling) ---")
    # This tests if the Block/Grid logic holds up under heavy load
    device = torch.device('cuda')
    
    # Large dimensions typical for 3D/NeRF
    B = 16384     # Large Batch
    In = 32       # Typical Coordinate Encoding width
    Out = 64      # Typical Hidden width
    grid_size = 100 # High resolution grid
    
    print(f"Config: Batch={B}, In={In}, Out={Out}, Grid={grid_size}")
    
    layer = FusionKANLayer(In, Out, grid_size=grid_size).to(device)
    x = torch.randn(B, In, device=device)
    
    # Warmup
    for _ in range(10):
        y = layer(x)
        loss = y.sum()
        loss.backward()
        
    torch.cuda.synchronize()
    start = time.time()
    
    steps = 100
    for _ in range(steps):
        # Forward
        y = layer(x)
        # Backward
        loss = y.sum()
        loss.backward()
        
    torch.cuda.synchronize()
    end = time.time()
    
    avg_time = (end - start) / steps * 1000 # ms
    print(f"‚úÖ Stress Test Complete.")
    print(f"Avg Time (Forward+Backward): {avg_time:.2f} ms per step")
    print(f"Throughput: {B * steps / (end - start):,.0f} samples/sec")

def run_convergence_test():
    print("\n--- 3. SANITY CHECK (Training Loop) ---")
    # Simple task: Learn Identity function y = x
    # If gradients are wrong, loss will explode or stay flat.
    
    model = FusionKANLayer(1, 1, grid_size=10).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    
    # Data: y = sin(x)
    x_train = torch.linspace(-2, 2, 1000).view(-1, 1).cuda()
    y_train = torch.sin(x_train)
    
    print("Training 50 steps...")
    for i in range(50):
        optimizer.zero_grad()
        pred = model(x_train)
        loss = torch.nn.functional.mse_loss(pred, y_train)
        loss.backward()
        optimizer.step()
        
        if i % 10 == 0:
            print(f"Step {i}: Loss {loss.item():.6f}")
            
    if loss.item() < 0.1:
        print("‚úÖ Convergence Check PASSED (Loss < 0.1)")
    else:
        print("‚ö†Ô∏è Convergence Check SUSPICIOUS (Loss is high)")

if __name__ == "__main__":
    run_gradient_check()
    run_stress_test()
    run_convergence_test()

FusionKAN Version: 1.0.0
CUDA Available: True

--- 1. NUMERICAL CORRECTNESS (GradCheck) ---
Running torch.autograd.gradcheck...
‚ùå Gradient Check FAILED
Jacobian mismatch for output 0 with respect to input 2,
numerical:tensor([[-1.1805,  1.4696, -0.1648,  0.9229,  0.2591, -0.5335, -0.4045, -0.7504,
         -0.2629, -4.3979, -4.6565,  0.1414,  1.4779,  2.7036,  4.1581, -2.6717,
          0.4072, -0.3723,  1.4248,  2.4468, -0.3104,  0.6066, -1.8865, -0.3712,
         -1.5217,  2.0443,  0.8747, -0.3724,  1.8979,  0.2793, -1.0910,  0.2961]],
       device='cuda:0', dtype=torch.float64)
analytical:tensor([[-2.3610,  2.9392, -0.3296,  1.8459,  0.5182, -1.0669, -0.8090, -1.5009,
         -0.5259, -8.7959, -9.3129,  0.2829,  2.9558,  5.4072,  8.3163, -5.3433,
          0.8145, -0.7446,  2.8496,  4.8936, -0.6209,  1.2132, -3.7729, -0.7425,
         -3.0434,  4.0885,  1.7494, -0.7448,  3.7958,  0.5585, -2.1820,  0.5923]],
       device='cuda:0', dtype=torch.float64)


--- 2. PERFORMANCE STRESS