In [1]:
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
import math
import seaborn as sns
import time


from kan_convolutional.KANLinear import KANLinear
from kan_convolutional.KANConv import KAN_Convolutional_Layer
from kan_convolutional import convolution 

In [2]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms

class Malimg(Dataset):
    def __init__(self, root_dir, transform=None):
        self.transform = transform
        self.image_files = []
        self.labels = []
        self.class_names = []

        for label, subfolder in enumerate(os.listdir(root_dir)):
            subfolder_path = os.path.join(root_dir, subfolder)
            if os.path.isdir(subfolder_path):
                if subfolder not in self.class_names:
                    self.class_names.append(subfolder)
                label = self.class_names.index(subfolder)
                for img_file in os.listdir(subfolder_path):
                    if img_file.endswith('.png'):
                        self.image_files.append(os.path.join(subfolder_path, img_file))
                        self.labels.append(label)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        image = Image.open(img_name).convert('L')
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        return image, label

    def get_class_names(self):
        return self.class_names

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

malimg_root_dir = "C:\\Users\\Anurag Dutta\\Desktop\\Essentials\\Research\\malware\\malimg"
gan_generated_root_dir = "C:\\Users\\Anurag Dutta\\Desktop\\Essentials\\Research\\malware\\gan_generated"

malimg_dataset = Malimg(root_dir=malimg_root_dir, transform=transform)
gan_generated_dataset = Malimg(root_dir=gan_generated_root_dir, transform=transform)

total_malimg_size = len(malimg_dataset)
test_size = int(0.2 * total_malimg_size)
train_size = total_malimg_size - test_size

malimg_train_dataset, malimg_test_dataset = random_split(malimg_dataset, [train_size, test_size])

train_dataset = torch.utils.data.ConcatDataset([malimg_train_dataset, gan_generated_dataset])
test_dataset = malimg_test_dataset

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [3]:
class SimpleLinear(nn.Module):
    def __init__(self):
        super(SimpleLinear, self).__init__()
        self.kan = KANLinear(
            in_features=64 * 64,
            out_features=25,
            grid_size=10,
            spline_order=3,
            scale_noise=0.01,
            scale_base=1,
            scale_spline=1,
            base_activation=nn.SiLU,
            grid_eps=0.02,
            grid_range=[0, 1]
        )
        self.flatten = nn.Flatten()

    def forward(self, x):
        x = self.flatten(x)
        x = self.kan(x)
        x = F.log_softmax(x, dim=1)
        return x

model = SimpleLinear()

In [4]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

start_time = time.time()

for epoch in range(10): 
    epoch_start_time = time.time()
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    epoch_time = time.time() - epoch_start_time
    print(f'Epoch [{epoch + 1}/10], Loss: {running_loss / len(train_loader):.4f}, Time elapsed: {epoch_time:.2f} seconds')

total_time = time.time() - start_time
print(f"Training completed in: {total_time:.2f} seconds")

model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='macro')
recall = recall_score(all_labels, all_preds, average='macro')
f1 = f1_score(all_labels, all_preds, average='macro')

print(f'Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')
torch.save(model.state_dict(), 'kan_l_gan_t_malware_classifier_64x64.pth')

Epoch [1/10], Loss: 0.1175, Time elapsed: 147.62 seconds
Epoch [2/10], Loss: 0.0120, Time elapsed: 132.43 seconds
Epoch [3/10], Loss: 0.0060, Time elapsed: 149.44 seconds
Epoch [4/10], Loss: 0.0030, Time elapsed: 127.31 seconds
Epoch [5/10], Loss: 0.0021, Time elapsed: 129.00 seconds
Epoch [6/10], Loss: 0.0011, Time elapsed: 137.73 seconds
Epoch [7/10], Loss: 0.0006, Time elapsed: 150.35 seconds
Epoch [8/10], Loss: 0.0004, Time elapsed: 131.38 seconds
Epoch [9/10], Loss: 0.0002, Time elapsed: 129.08 seconds
Epoch [10/10], Loss: 0.0002, Time elapsed: 130.47 seconds
Training completed in: 1364.81 seconds
Accuracy: 0.9877, Precision: 0.9724, Recall: 0.9650, F1 Score: 0.9667
