In [872]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import math
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [873]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import Kan_NN
import importlib
importlib.reload(Kan_NN)
import KAN_NN_fast
importlib.reload(KAN_NN_fast)
import KAN_NN_fast_repeat
importlib.reload(KAN_NN_fast_repeat)

<module 'KAN_NN_fast_repeat' from 'c:\\Users\\JP\\Documents\\TU Berlin\\Master\\Code_clean\\AAAA\\KAN_NN_fast_repeat.py'>

In [874]:
@torch.no_grad()
def compute_test_loss(test_loader, model):
    criterion = torch.nn.MSELoss()
    running_loss = 0.
    for batch, target in test_loader:
        outputs = model(batch)
        loss = criterion(target, outputs)
        running_loss += loss.item()
    return running_loss / len(test_loader)

In [875]:
#### FRIEDMANN 1
from sklearn.datasets import make_friedman1
def get_loader(in_dim, noise, n_samples = 20000):
    # Set the seed for reproducibility
    seed = 42
    np.random.seed(seed)
    torch.manual_seed(seed)

    # Generate the Friedmann dataset
    X_train, y = make_friedman1(n_samples=int(n_samples * 0.8), n_features= in_dim, random_state=seed, noise=noise)
    y_train = np.expand_dims(y, axis=1)
    # Split into train and test sets (80% train, 20% test)
    X_test, y = make_friedman1(n_samples=int(n_samples * 0.2), n_features= in_dim, random_state=seed, noise=0.0)
    y_test = np.expand_dims(y, axis=1)
    # Convert numpy arrays to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

    # Create TensorDataset for train and test sets
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    # Set batch size and create DataLoader for training and testing
    batch_size = 1024
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=10000, shuffle=False)
    return train_loader, test_loader



In [876]:
model = KAN_NN_fast.Neural_Kan(shape = [5,256,1], h = [16])
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

5 256
256 1
73985


In [877]:
in_dim = 5
model = KAN_NN_fast.Neural_Kan(shape = [in_dim,256,1], h = [16])  # replace with KAN_NN_fast.Neural_Kan(...) or any model
model.train()

inputs = torch.randn(32, in_dim)  # adjust input size as needed
targets = torch.randn(32, 1)      # adjust target shape as needed
criterion = nn.MSELoss()
optimizer = torch.optim.RAdam(model.parameters(), lr=0.001)

with torch.profiler.profile(
    activities=[torch.profiler.ProfilerActivity.CPU],  # or add CUDA if using GPU
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as prof:
    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Print top 20 most expensive ops (including backward)
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=20))

5 256
256 1
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                              Optimizer.step#RAdam.step        18.58%      11.725ms        29.55%      18.645ms      18.645ms     578.04 Kb    -578.01 Kb             1  
                                              aten::bmm        20.63%      13.018ms        20.64%      13.019ms       2.170ms       3.13 Mb       3.13 Mb             6  
      autograd::engine::evaluate_function: BmmBackward0         0.14%      91.000us        12.80%       8.077ms       4.038ms       2.56 M

In [None]:
import time
import dill
n_samples = 20000
widths = [[128]]
in_dims = [5]
noises = [1.]
train_losses = []
test_losses = []
for width in widths:
    for in_dim in in_dims:
        for noise in noises:
            print(in_dim, noise, width)
            shape = [in_dim, width[0], 1]
            train_loader, test_loader = get_loader(in_dim, noise,n_samples)
            model = KAN_NN_fast.Neural_Kan(shape = shape, h = [32])
            print(model)
            epochs = 1000
            model.train()
            optimizer = optim.Adam(model.parameters(), lr=0.001)
            criterion = torch.nn.MSELoss()
            for epoch in range(epochs):
                running_loss = 0.0 
                for batch, target in train_loader:
                    start_time = time.time()
                    optimizer.zero_grad()
                    outputs = model(batch)
                    loss = criterion(target, outputs)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()
                avg_loss = running_loss / len(train_loader)
                model.train_loss.append(avg_loss)
                if not ((epoch + 1) % 100):
                    test_l = compute_test_loss(test_loader, model)
                    model.test_loss.append(test_l)
                else:
                    test_l = 1
                print(f"Epoch [{epoch + 1}/{epochs}], Loss: {avg_loss:.6f}, test: {test_l:.6f}, lr: {optimizer.param_groups[0]['lr']:6f}")
            plt.plot(model.train_loss[-50:])
            plt.title(f'train_loss')
            plt.legend()
            plt.show()
            print("Training Complete!")
            with open(f"models/Friedmann_1_KAN_arbwidth{noise}_{in_dim}.dill", "wb") as f:
                dill.dump(model, f)
            #with open(f"models/KAN_{width[0]}_{noise}_{in_dim}.dill", "wb") as f:
            #    dill.dump(model, f)



5 1.0 [128]
5 128
128 1
Neural_Kan(
  (layers): Sequential(
    (0): KAN_layer(
      (layers): Sequential(
        (0): Input_Linear()
        (1): ReLU()
        (2): Output_Linear()
      )
    )
    (1): KAN_layer(
      (layers): Sequential(
        (0): Input_Linear()
        (1): ReLU()
        (2): Output_Linear()
      )
    )
  )
)
Epoch [1/1000], Loss: 93.828364, test: 1.000000, lr: 0.001000
Epoch [2/1000], Loss: 31.260930, test: 1.000000, lr: 0.001000
Epoch [3/1000], Loss: 25.139443, test: 1.000000, lr: 0.001000
Epoch [4/1000], Loss: 24.765115, test: 1.000000, lr: 0.001000
Epoch [5/1000], Loss: 24.428970, test: 1.000000, lr: 0.001000
Epoch [6/1000], Loss: 24.316508, test: 1.000000, lr: 0.001000
Epoch [7/1000], Loss: 24.330039, test: 1.000000, lr: 0.001000
Epoch [8/1000], Loss: 24.133897, test: 1.000000, lr: 0.001000
Epoch [9/1000], Loss: 23.898387, test: 1.000000, lr: 0.001000
Epoch [10/1000], Loss: 23.495801, test: 1.000000, lr: 0.001000
Epoch [11/1000], Loss: 22.667371, t