# Implementation of a KANBoost
## Initialisations



In [None]:
!pip install pykan



In [None]:
import torch
from kan import *
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")



cuda


## Loading the Dataset

In [None]:
class DeltaPrefetcherModel:
    def __init__(self, page_size, block_size):
        self.page_size = page_size
        self.block_size = block_size

    def ensure_48bit_address(self, load_address):
        # Ensure the load_address is a 48-bit binary string
        return bin(int(load_address, 16))[2:].zfill(48)

    def calculate_delta(self, block1, block2):
        # Calculate the delta between two blocks (binary subtraction)
        return int(block1, 2) - int(block2, 2)

    def split_load_address(self, line):
        instr_id, cycle_count, load_address, instr_ptr, llc_hit_miss = line
        binary_address = self.ensure_48bit_address(load_address)

        page = binary_address[:self.page_size]  # x Bit (Varies)
        block = binary_address[self.page_size:self.page_size + self.block_size]  # 6 Bit Fixed
        block_offset = binary_address[self.page_size + self.block_size:]  # Remaining bits

        return (instr_id, page, block, block_offset)

    def delta_to_one_hot(self, delta):
        # Create a 128-dimensional array initialized with zeros
        one_hot = [0] * 128
        # Calculate the correct position (64 + delta)
        position = 64 + delta
        # Set the corresponding position to 1
        one_hot[position] = 1
        return one_hot

    def preprocess_data(self, data):
        input_features = []
        output_labels = []
        page_blocks = {}

        for i in range(len(data) - 1):
            instr_id, current_page, current_block, current_block_offset = self.split_load_address(data[i])
            _, next_page, next_block, next_block_offset = self.split_load_address(data[i + 1])

            # Initialize page_blocks if current_page is not present
            if current_page not in page_blocks:
                page_blocks[current_page] = ['000001']

            # Calculate delta values for the past blocks
            delta1 = delta2 = delta3 = 1

            if len(page_blocks[current_page]) > 1:
                delta1 = self.calculate_delta(page_blocks[current_page][-1], page_blocks[current_page][-2])
            if len(page_blocks[current_page]) > 2:
                delta2 = self.calculate_delta(page_blocks[current_page][-2], page_blocks[current_page][-3])
            if len(page_blocks[current_page]) > 3:
                delta3 = self.calculate_delta(page_blocks[current_page][-3], page_blocks[current_page][-4])

            # Calculate delta for the next block (relative to current block)
            next_delta = self.calculate_delta(next_block, current_block)

            # Append input features
            input_features.append((instr_id, int(current_block, 2), delta1, delta2, delta3))

            # Convert next_delta to a 128-dimensional one-hot array and append as the output label
            output_labels.append(next_delta+64)

            # Append the current block to the page's block list
            page_blocks[current_page].append(current_block)

        return input_features, output_labels

# Reading the data from text file (same as before)
def read_data_from_file(filename):
    data = []
    with open(filename, 'r') as file:
        for line in file:
            line = line.strip()  # Remove any extra spaces or newline characters
            if line:  # Skip empty lines
                # Split by comma and remove any extra spaces
                fields = [x.strip() for x in line.split(',')]
                instr_id = int(fields[0])
                cycle_count = int(fields[1])
                load_address = fields[2]
                instr_ptr = fields[3]
                llc_hit_miss = int(fields[4])

                # Append as a tuple
                data.append((instr_id, cycle_count, load_address, instr_ptr, llc_hit_miss))
    return data

# Usage (same as before)
filename = '/content/new_DS.txt'  # Replace with your actual file path
data = read_data_from_file(filename)

# Initialize the DeltaPrefetcherModel
page_size = 36  # Replace with your actual page size (bits)
block_size = 6  # Replace with your actual block size (bits)

model = DeltaPrefetcherModel(page_size, block_size)

# Use the preprocess_data function to process the entire data
input_features, output_labels = model.preprocess_data(data)


In [None]:
def load_dataset():

    data = input_features
    target = output_labels

    # Convert to PyTorch tensors
    data_tensor = torch.tensor(data, dtype=torch.float32)
    target_tensor = torch.tensor(target, dtype=torch.long) #This needs to be torch.float32

    # Split dataset into train and test sets
    train_data, test_data, train_target, test_target = train_test_split(data_tensor, target_tensor, test_size=0.2, random_state=42)

    # Create data loaders (optional, if you want to batch and shuffle the data)
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(train_data, train_target), batch_size=1, shuffle=True)
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(test_data, test_target), batch_size=1, shuffle=False)

    train_inputs = torch.empty(0, 5, device=device)
    train_labels = torch.empty(0, dtype=torch.long,device=device)
    test_inputs = torch.empty(0, 5, device=device)
    test_labels = torch.empty(0,dtype=torch.long,  device=device)

    # Concatenate all data into a single tensor on the specified device
    for data, labels in train_loader:
        train_inputs = torch.cat((train_inputs, data.to(device)), dim=0)
        train_labels = torch.cat((train_labels, labels.to(device)), dim=0)

    for data, labels in test_loader:
        test_inputs = torch.cat((test_inputs, data.to(device)), dim=0)
        test_labels = torch.cat((test_labels, labels.to(device)), dim=0)

    dataset = {}
    dataset['train_input'] = train_inputs
    dataset['test_input'] = test_inputs
    dataset['train_label'] = train_labels
    dataset['test_label'] = test_labels

    return dataset

traces_dataset = load_dataset()

In [None]:
print("Train data shape: {}".format(traces_dataset['train_input'].shape))
print("Train target shape: {}".format(traces_dataset['train_label'].shape))
print("Test data shape: {}".format(traces_dataset['test_input'].shape))
print("Test target shape: {}".format(traces_dataset['test_label'].shape))


Train data shape: torch.Size([16605, 5])
Train target shape: torch.Size([16605])
Test data shape: torch.Size([4152, 5])
Test target shape: torch.Size([4152])


## Creating and Training the KAN

In [None]:
model = KAN(width=[5, 64,128], grid=3, k=3, seed=0, device=device)
model(traces_dataset['train_input'])

checkpoint directory created: ./model
saving model version 0.0


tensor([[ 14320.0703,  43107.1406,  10604.2236,  ...,  47133.6484,
          26611.8516, -11398.8232],
        [ 21378.7598,  64362.2109,  15829.4883,  ...,  70367.2188,
          39730.7109, -17019.0059],
        [ 24095.5332,  72544.8438,  17840.0410,  ...,  79309.6875,
          44780.3125, -19181.7090],
        ...,
        [  7284.3921,  21917.9727,   5397.5762,  ...,  23977.3926,
          13536.5020,  -5797.0791],
        [ 47148.5469, 141947.6250,  34909.1484,  ..., 155187.0938,
          87622.2188, -37534.2734],
        [ 13047.6621,  39279.3945,   9661.4541,  ...,  42946.3633,
          24248.1523, -10386.2217]], device='cuda:0', grad_fn=<AddBackward0>)

In [None]:
def train_acc():
    return torch.mean((torch.argmax(model(traces_dataset['train_input']), dim=1) == traces_dataset['train_label']).float())

def test_acc():
    return torch.mean((torch.argmax(model(traces_dataset['test_input']), dim=1) == traces_dataset['test_label']).float())

results = model.fit(traces_dataset, opt="Adam", metrics=(train_acc, test_acc),
                      loss_fn=torch.nn.CrossEntropyLoss(), steps=5000, lamb=0.01, lamb_entropy=10.05, save_fig=False, img_folder=image_folder)


| train_loss: 2.16e+01 | test_loss: 2.20e+01 | reg: 3.85e+02 | : 100%|█| 5000/5000 [50:39<00:00,  1.

saving model version 0.1





In [None]:
results['train_acc'][-1], results['test_acc'][-1]

(0.15850646793842316, 0.15751445293426514)

In [None]:
model = model.prune()
model(traces_dataset['train_input'])
# model.plot(scale=1)

saving model version 0.2


tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0',
       grad_fn=<AddBackward0>)

In [None]:
# fine tune
results_1 = model.fit(traces_dataset, opt="Adam", metrics=(train_acc, test_acc),
                      loss_fn=torch.nn.CrossEntropyLoss(), steps=50, lamb=0.01, lamb_entropy=10.)
results_1['train_acc'][-1], results_1['test_acc'][-1]

description:   0%|                                                           | 0/50 [00:00<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.95 GiB. GPU 0 has a total capacity of 14.75 GiB of which 1.62 GiB is free. Process 105292 has 13.13 GiB memory in use. Of the allocated memory 12.03 GiB is allocated by PyTorch, and 954.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
lib = ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','tan','abs']
model.auto_symbolic(lib=lib)

In [None]:
formula1, formula2, formula3 = model.symbolic_formula()[0] x

In [None]:
formula1