## Required Libraries

In [None]:
#to install KAN library
#pip install pykan

In [None]:
import pandas as pd
import numpy as np
import torch
from kan import *  
import matplotlib.pyplot as plt
#from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import moviepy.video.io.ImageSequenceClip

device="cpu"

In [None]:
data = pd.read_csv('dataset_v3.csv')
data.head()

## Data Pre-processing

In [None]:
def load_dataset(data_set,batch_size=512):
    dataset=data_set
    dataset = shuffle(dataset, random_state=42)
    data = dataset.drop('Label', axis=1).values
    target = dataset['Label'].values

    # Split dataset into train and test sets
    train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=0.2, random_state=42)
    # scaler = StandardScaler()
    # train_data = scaler.fit_transform(train_data)
    # test_data = scaler.transform(test_data)

    # Convert to PyTorch tensors
    train_data = torch.tensor(train_data, dtype=torch.float32)
    train_target = torch.tensor(train_target, dtype=torch.long)
    test_data = torch.tensor(test_data, dtype=torch.float32)
    test_target = torch.tensor(test_target, dtype=torch.long)

    # Create data loaders
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(train_data, train_target), batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(test_data, test_target), batch_size=batch_size, shuffle=false)

    train_inputs = torch.empty(0, 4, device=device)
    train_labels = torch.empty(0, dtype=torch.long, device=device)
    test_inputs = torch.empty(0, 4, device=device)
    test_labels = torch.empty(0, dtype=torch.long, device=device)


    # Concatenate all data into a single tensor on the specified device
    for data, labels in train_loader:
        train_inputs = torch.cat((train_inputs, data.to(device)), dim=0)
        train_labels = torch.cat((train_labels, labels.to(device)), dim=0)

    for data, labels in test_loader:
        test_inputs = torch.cat((test_inputs, data.to(device)), dim=0)
        test_labels = torch.cat((test_labels, labels.to(device)), dim=0)


    dataset = {}
    dataset['train_input'] = train_inputs
    dataset['test_input'] = test_inputs
    dataset['train_label'] = train_labels
    dataset['test_label'] = test_labels

    

    return dataset

dataset = load_dataset(data,batch_size=512)

In [None]:
print("Train data shape: {}".format(dataset['train_input'].shape))
print("Train target shape: {}".format(dataset['train_label'].shape))
print("Test data shape: {}".format(dataset['test_input'].shape))
print("Test target shape: {}".format(dataset['test_label'].shape))

In [None]:
# column_names = ["Variance", "Skewness", "Kurtosis", "Entropy"]
column_names = ["Var", "Skew", "Kurt", "Entp"]
target_names = ["Real", "Fake"]

## Model Initialisation

In [None]:
image_folder = 'video_img'

model = KAN(width=[4, 4, 2], grid=3, k=3, seed=0, device=device)
#k is for number of splines assigned per edge

model(dataset['train_input'])
model.plot(beta=100, scale=1, in_vars=column_names, out_vars=target_names)

## Metrics for accuracy

In [None]:
def train_acc():
    return torch.mean((torch.argmax(model(dataset['train_input']), dim=1) == dataset['train_label']).float())

def test_acc():
    return torch.mean((torch.argmax(model(dataset['test_input']), dim=1) == dataset['test_label']).float())

## Model Training and Evaluation

In [None]:
results = model.fit(dataset, opt="Adam", metrics=(train_acc, test_acc),
                      loss_fn=torch.nn.CrossEntropyLoss(), steps=100, lamb=0.01, lamb_entropy=10., save_fig=True, img_folder=image_folder)
model.plot(beta=100, scale=1, in_vars=column_names, out_vars=target_names)

In [None]:
results['train_acc'][-1], results['test_acc'][-1]

In [None]:
model.plot(beta=100, scale=1,sample=True, in_vars=column_names, out_vars=target_names)

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")

## Video Generation

In [None]:
video_name='fin_video'
fps=5

fps = fps
files = os.listdir(image_folder)
train_index = []
for file in files:
    if file[0].isdigit() and file.endswith('.jpg'):
        train_index.append(int(file[:-4]))

train_index = np.sort(train_index)

image_files = [image_folder+'/'+str(train_index[index])+'.jpg' for index in train_index]

clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(image_files, fps=fps)
clip.write_videofile(video_name+'.mp4')

## Pruning,assigning Symbols and optimization

In [None]:
model1 = model.prune()

model1(dataset['train_input'])
model1.plot(beta=1000,scale=1, in_vars=column_names, out_vars=target_names)

In [None]:
model = model.rewind('0.1') 
#change value for rewinding to past models

In [None]:
total_params = sum(p.numel() for p in model1.parameters())
trainable_params = sum(p.numel() for p in model1.parameters() if p.requires_grad)

print(f"Total parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")

In [None]:
results =model1.fit(dataset, opt="Adam", metrics=(train_acc, test_acc),
                    loss_fn=torch.nn.CrossEntropyLoss(), steps=100, lamb=0.01, lamb_entropy=10., save_fig=True, img_folder=image_folder)
results['train_acc'][-1], results['test_acc'][-1]
model1.plot(scale=1, in_vars=column_names, out_vars=target_names)

In [None]:
results['train_acc'][-1], results['test_acc'][-1]

In [None]:
lib = ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','tan','abs']
model.auto_symbolic(lib=lib)
model(dataset['train_input'])
model.plot(beta=100,scale=1, in_vars=column_names, out_vars=target_names)

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")

In [None]:

results =model.fit(dataset, opt="Adam", metrics=(train_acc, test_acc),
                    loss_fn=torch.nn.CrossEntropyLoss(), steps=50, lamb=0.01, lamb_entropy=10., save_fig=True, img_folder=image_folder)
results['train_acc'][-1], results['test_acc'][-1]

## sample comparision

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(4, 6)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(6, 6)
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(6, 2)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

def train_model(model, train_loader, criterion, optimizer, num_epochs=100):
    model.train()
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to the specified device
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to the specified device
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy}%')

In [None]:
def load_dataset(data_set,batch_size=512):
    dataset=data_set
    dataset = shuffle(dataset, random_state=42)
    data = dataset.drop('Label', axis=1).values
    target = dataset['Label'].values

    # Split dataset into train and test sets
    train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=0.2, random_state=42)
    # scaler = StandardScaler()
    # train_data = scaler.fit_transform(train_data)
    # test_data = scaler.transform(test_data)

    # Convert to PyTorch tensors
    train_data = torch.tensor(train_data, dtype=torch.float32)
    train_target = torch.tensor(train_target, dtype=torch.long)
    test_data = torch.tensor(test_data, dtype=torch.float32)
    test_target = torch.tensor(test_target, dtype=torch.long)

    # Create data loaders
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(train_data, train_target), batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(test_data, test_target), batch_size=batch_size, shuffle=false)

    return train_loader, test_loader

train_loader, test_loader = load_dataset(data,batch_size=512)

In [None]:
model = NeuralNetwork().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

train_model(model, train_loader, criterion, optimizer)

In [None]:
test_model(model, test_loader)

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")