In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, f1_score, precision_score, recall_score, accuracy_score
from collections import Counter
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
import torch.nn as nn
import torch.nn.functional as F
import time, os, random, copy
import joblib
import math
import pickle as p
import seaborn as sns

The WADI dataset can be downloaded (or added on Kaggle) from this [Link](https://www.kaggle.com/datasets/giovannimonco/wadi-data)

In [None]:
import pandas as pd

# Read the CSV, skipping the first row (which contains unwanted text or an extra header)
data_path_attack = "/kaggle/input/wadi-data/WADI_attackdataLABLE.csv"
attack_csv = pd.read_csv(data_path_attack, header=1)

# Strip all column names to remove leading/trailing whitespace
attack_csv.columns = attack_csv.columns.str.strip()

# Now you can safely set 'Row' as the index
df_attack = attack_csv.set_index('Row')

df_attack = df_attack.drop(["Date", "Time"], axis=1)
df_attack.rename(columns={"Attack LABLE (1:No Attack, -1:Attack)": "target"}, inplace=True)

num_nans = df_attack.isna().sum()

for feature in num_nans[num_nans>2].index:
    df_attack.pop(feature)

df_attack = df_attack.iloc[0:-2]

In [None]:
df_attack.shape

In [None]:
label_attack = df_attack.pop("target")

In [None]:
label_attack = label_attack.map({1: 0, -1: 1})

In [None]:
Counter(label_attack)

In [None]:
X = df_attack
y = label_attack

print(X.shape, y.shape)

In [None]:
batch_size = 1200

In [None]:
SEED = 24
test_ratio = 0.2

In [None]:
def set_seed(seed_value):
    """Set seed for reproducibility."""
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    # sklearn.random.seed(seed_value)
    # sklearn.utils.check_random_state(seed_value)
    joblib.parallel_backend('threading', n_jobs=1)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set seed for reproducibility
set_seed(SEED)

In [None]:
x_train , x_test , y_train , y_test = train_test_split(X, y, test_size=test_ratio, random_state=SEED, shuffle=True)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

In [None]:
scaler = StandardScaler()
scaler.fit(x_train)

x_train_standardized = scaler.transform(x_train)
x_test_standardized = scaler.transform(x_test)

print(np.mean(x_train_standardized), np.mean(x_test_standardized))
print(np.std(x_train_standardized), np.std(x_test_standardized))

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


def dkd_loss(logits_student, logits_teacher, target, config):
    alpha = config.beta
    beta = config.gammar
    temperature = config.temperature
    gt_mask = _get_gt_mask(logits_student, target)
    other_mask = _get_other_mask(logits_student, target)
    pred_student = F.softmax(logits_student / temperature, dim=1)
    pred_teacher = F.softmax(logits_teacher / temperature, dim=1)
    pred_student = cat_mask(pred_student, gt_mask, other_mask)
    pred_teacher = cat_mask(pred_teacher, gt_mask, other_mask)
    log_pred_student = torch.log(pred_student)
    tckd_loss = (
        F.kl_div(log_pred_student, pred_teacher, reduction='sum')
        * (temperature**2)
        / target.shape[0]
    )
    pred_teacher_part2 = F.softmax(
        logits_teacher / temperature - 1000.0 * gt_mask, dim=1
    )
    log_pred_student_part2 = F.log_softmax(
        logits_student / temperature - 1000.0 * gt_mask, dim=1
    )
    nckd_loss = (
        F.kl_div(log_pred_student_part2, pred_teacher_part2, reduction='sum')
        * (temperature**2)
        / target.shape[0]
    )
    return alpha * tckd_loss + beta * nckd_loss

def kd_loss(logits_student, logits_teacher, config):
    temperature = config.temperature
    log_pred_student = F.log_softmax(logits_student / temperature, dim=1)
    pred_teacher = F.softmax(logits_teacher / temperature, dim=1)
    loss_kd = F.kl_div(log_pred_student, pred_teacher, reduction="none").sum(1).mean()
    loss_kd *= temperature**2
    return loss_kd

def _get_gt_mask(logits, target):
    target = target.reshape(-1)
    mask = torch.zeros_like(logits).scatter_(1, target.unsqueeze(1), 1).bool()
    return mask


def _get_other_mask(logits, target):
    target = target.reshape(-1)
    mask = torch.ones_like(logits).scatter_(1, target.unsqueeze(1), 0).bool()
    return mask


def cat_mask(t, mask1, mask2):
    t1 = (t * mask1).sum(dim=1, keepdims=True)
    t2 = (t * mask2).sum(1, keepdims=True)
    rt = torch.cat([t1, t2], dim=1)
    return rt

In [None]:
# Count parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
class KANLinear(torch.nn.Module):
    def __init__(
        self,
        in_features,
        out_features,
        grid_size=5,
        spline_order=3,
        scale_noise=0.1,
        scale_base=1.0,
        scale_spline=1.0,
        enable_standalone_scale_spline=True,
        base_activation=torch.nn.SiLU,
        grid_eps=0.02,
        grid_range=[-1, 1],
    ):
        super(KANLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.grid_size = grid_size
        self.spline_order = spline_order

        h = (grid_range[1] - grid_range[0]) / grid_size
        grid = (
            (
                torch.arange(-spline_order, grid_size + spline_order + 1) * h
                + grid_range[0]
            )
            .expand(in_features, -1)
            .contiguous()
        )
        self.register_buffer("grid", grid)

        self.base_weight = torch.nn.Parameter(torch.Tensor(out_features, in_features))
        self.spline_weight = torch.nn.Parameter(
            torch.Tensor(out_features, in_features, grid_size + spline_order)
        )
        if enable_standalone_scale_spline:
            self.spline_scaler = torch.nn.Parameter(
                torch.Tensor(out_features, in_features)
            )

        self.scale_noise = scale_noise
        self.scale_base = scale_base
        self.scale_spline = scale_spline
        self.enable_standalone_scale_spline = enable_standalone_scale_spline
        self.base_activation = base_activation()
        self.grid_eps = grid_eps

        self.reset_parameters()

    def reset_parameters(self):
        torch.nn.init.kaiming_uniform_(self.base_weight, a=math.sqrt(5) * self.scale_base)
        with torch.no_grad():
            noise = (
                (
                    torch.rand(self.grid_size + 1, self.in_features, self.out_features)
                    - 1 / 2
                )
                * self.scale_noise
                / self.grid_size
            )
            self.spline_weight.data.copy_(
                (self.scale_spline if not self.enable_standalone_scale_spline else 1.0)
                * self.curve2coeff(
                    self.grid.T[self.spline_order : -self.spline_order],
                    noise,
                )
            )
            if self.enable_standalone_scale_spline:
                # torch.nn.init.constant_(self.spline_scaler, self.scale_spline)
                torch.nn.init.kaiming_uniform_(self.spline_scaler, a=math.sqrt(5) * self.scale_spline)

    def b_splines(self, x: torch.Tensor):
        """
        Compute the B-spline bases for the given input tensor.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, in_features).

        Returns:
            torch.Tensor: B-spline bases tensor of shape (batch_size, in_features, grid_size + spline_order).
        """
        assert x.dim() == 2 and x.size(1) == self.in_features

        grid: torch.Tensor = (
            self.grid
        )  # (in_features, grid_size + 2 * spline_order + 1)
        x = x.unsqueeze(-1)
        bases = ((x >= grid[:, :-1]) & (x < grid[:, 1:])).to(x.dtype)
        for k in range(1, self.spline_order + 1):
            bases = (
                (x - grid[:, : -(k + 1)])
                / (grid[:, k:-1] - grid[:, : -(k + 1)])
                * bases[:, :, :-1]
            ) + (
                (grid[:, k + 1 :] - x)
                / (grid[:, k + 1 :] - grid[:, 1:(-k)])
                * bases[:, :, 1:]
            )

        assert bases.size() == (
            x.size(0),
            self.in_features,
            self.grid_size + self.spline_order,
        )
        return bases.contiguous()

    def curve2coeff(self, x: torch.Tensor, y: torch.Tensor):
        """
        Compute the coefficients of the curve that interpolates the given points.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, in_features).
            y (torch.Tensor): Output tensor of shape (batch_size, in_features, out_features).

        Returns:
            torch.Tensor: Coefficients tensor of shape (out_features, in_features, grid_size + spline_order).
        """
        assert x.dim() == 2 and x.size(1) == self.in_features
        assert y.size() == (x.size(0), self.in_features, self.out_features)

        A = self.b_splines(x).transpose(
            0, 1
        )  # (in_features, batch_size, grid_size + spline_order)
        B = y.transpose(0, 1)  # (in_features, batch_size, out_features)
        solution = torch.linalg.lstsq(
            A, B
        ).solution  # (in_features, grid_size + spline_order, out_features)
        result = solution.permute(
            2, 0, 1
        )  # (out_features, in_features, grid_size + spline_order)

        assert result.size() == (
            self.out_features,
            self.in_features,
            self.grid_size + self.spline_order,
        )
        return result.contiguous()

    @property
    def scaled_spline_weight(self):
        return self.spline_weight * (
            self.spline_scaler.unsqueeze(-1)
            if self.enable_standalone_scale_spline
            else 1.0
        )

    def forward(self, x: torch.Tensor):
        assert x.size(-1) == self.in_features
        original_shape = x.shape
        x = x.view(-1, self.in_features)

        base_output = F.linear(self.base_activation(x), self.base_weight)
        spline_output = F.linear(
            self.b_splines(x).view(x.size(0), -1),
            self.scaled_spline_weight.view(self.out_features, -1),
        )
        output = base_output + spline_output
        
        output = output.view(*original_shape[:-1], self.out_features)
        return output

    @torch.no_grad()
    def update_grid(self, x: torch.Tensor, margin=0.01):
        assert x.dim() == 2 and x.size(1) == self.in_features
        batch = x.size(0)

        splines = self.b_splines(x)  # (batch, in, coeff)
        splines = splines.permute(1, 0, 2)  # (in, batch, coeff)
        orig_coeff = self.scaled_spline_weight  # (out, in, coeff)
        orig_coeff = orig_coeff.permute(1, 2, 0)  # (in, coeff, out)
        unreduced_spline_output = torch.bmm(splines, orig_coeff)  # (in, batch, out)
        unreduced_spline_output = unreduced_spline_output.permute(
            1, 0, 2
        )  # (batch, in, out)

        # sort each channel individually to collect data distribution
        x_sorted = torch.sort(x, dim=0)[0]
        grid_adaptive = x_sorted[
            torch.linspace(
                0, batch - 1, self.grid_size + 1, dtype=torch.int64, device=x.device
            )
        ]

        uniform_step = (x_sorted[-1] - x_sorted[0] + 2 * margin) / self.grid_size
        grid_uniform = (
            torch.arange(
                self.grid_size + 1, dtype=torch.float32, device=x.device
            ).unsqueeze(1)
            * uniform_step
            + x_sorted[0]
            - margin
        )

        grid = self.grid_eps * grid_uniform + (1 - self.grid_eps) * grid_adaptive
        grid = torch.concatenate(
            [
                grid[:1]
                - uniform_step
                * torch.arange(self.spline_order, 0, -1, device=x.device).unsqueeze(1),
                grid,
                grid[-1:]
                + uniform_step
                * torch.arange(1, self.spline_order + 1, device=x.device).unsqueeze(1),
            ],
            dim=0,
        )

        self.grid.copy_(grid.T)
        self.spline_weight.data.copy_(self.curve2coeff(x, unreduced_spline_output))

    def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0):
        """
        Compute the regularization loss.

        This is a dumb simulation of the original L1 regularization as stated in the
        paper, since the original one requires computing absolutes and entropy from the
        expanded (batch, in_features, out_features) intermediate tensor, which is hidden
        behind the F.linear function if we want an memory efficient implementation.

        The L1 regularization is now computed as mean absolute value of the spline
        weights. The authors implementation also includes this term in addition to the
        sample-based regularization.
        """
        l1_fake = self.spline_weight.abs().mean(-1)
        regularization_loss_activation = l1_fake.sum()
        p = l1_fake / regularization_loss_activation
        regularization_loss_entropy = -torch.sum(p * p.log())
        return (
            regularize_activation * regularization_loss_activation
            + regularize_entropy * regularization_loss_entropy
        )


class KAN(torch.nn.Module):
    def __init__(
        self,
        layers_hidden,
        grid_size=5,
        spline_order=3,
        scale_noise=0.1,
        scale_base=1.0,
        scale_spline=1.0,
        base_activation=torch.nn.SiLU,
        grid_eps=0.02,
        grid_range=[-1, 1],
    ):
        super(KAN, self).__init__()
        self.grid_size = grid_size
        self.spline_order = spline_order

        self.layers = torch.nn.ModuleList()
        for in_features, out_features in zip(layers_hidden, layers_hidden[1:]):
            self.layers.append(
                KANLinear(
                    in_features,
                    out_features,
                    grid_size=grid_size,
                    spline_order=spline_order,
                    scale_noise=scale_noise,
                    scale_base=scale_base,
                    scale_spline=scale_spline,
                    base_activation=base_activation,
                    grid_eps=grid_eps,
                    grid_range=grid_range,
                )
            )

    def forward(self, x: torch.Tensor, update_grid=False):
        for layer in self.layers:
            if update_grid:
                layer.update_grid(x)
            x = layer(x)
        return x

    def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0):
        return sum(
            layer.regularization_loss(regularize_activation, regularize_entropy)
            for layer in self.layers
        )

# Teacher Model


In [None]:
# Spline order
s_o = 6
# Grid size
g_s = 20

In [None]:
class KAN_model(nn.Module):

    def __init__(self):
        super(KAN_model, self).__init__()
        self.KAN1 = KAN([123, 30], grid_size=g_s, spline_order=s_o, grid_range=[-4, 4])
        self.projection = KAN([30, 2], grid_size=g_s, spline_order=s_o, grid_range=[-1, 1])

    def forward(self, x):


        x = x.view(x.shape[0], 1,-1)
        out = self.KAN1(x)
        out = self.projection(out)
        out_f = out.view(x.shape[0], out.size(1) * out.size(2))


        return out_f

In [None]:
teacher_model = KAN_model()
print(f"Total parameters of KAN teacher_model: {count_parameters(teacher_model)}")

# Student Model

In [None]:
import torch
import torch.nn as nn

class MLP_model(nn.Module):
    def __init__(self):
        super(MLP_model, self).__init__()
        
        self.fc1 = nn.Linear(123, 20)
        self.fc2 = nn.Linear(20, 2)
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)  # Flatten input
        out = torch.relu(self.fc1(x))
        out = self.fc2(out)
        return out


In [None]:
student_model = MLP_model()
print(f"Total parameters of MLP student_model: {count_parameters(student_model)}")

In [None]:
def Create_Dataloader(train_data, val_data, train_label, val_label, batch_size = 8):

    # train, valid, test split
    X_train, X_valid, y_train, y_valid = train_data, val_data, train_label, val_label

    trainX = np.array(X_train)
    y_train = np.array(y_train)
    validX = np.array(X_valid)
    y_valid = np.array(y_valid)

    # reshaping data
    X_train1 = np.reshape(trainX, (trainX.shape[0],trainX.shape[1],1))
    X_valid1 = np.reshape(validX, (validX.shape[0],validX.shape[1],1))

    # Create the DataLoader for our data
    X_train1 = torch.tensor(X_train1)
    X_valid1 = torch.tensor(X_valid1)

    y_train = torch.tensor(y_train).type(torch.LongTensor)
    y_valid = torch.tensor(y_valid).type(torch.LongTensor)


    train_data = TensorDataset(X_train1, y_train)
    valid_data = TensorDataset(X_valid1, y_valid)

    train_sampler = RandomSampler(train_data)
    valid_sampler = RandomSampler(valid_data)

    train_dataloader = DataLoader(train_data, batch_size=batch_size)
    valid_dataloader = DataLoader(valid_data, batch_size=batch_size)

    return train_dataloader, valid_dataloader

In [None]:
def calculate_confusion_matrix(y_true, y_pred, positive, negative):

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    TP, FP, FN, TN = 0, 0, 0, 0

    for i in range(len(y_pred)):

        if y_pred[i] == positive:
            if y_true[i] == positive:
                TP += 1  # True Positive
            else:
                FP += 1  # False Positive

        elif y_pred[i] == negative:
            if y_true[i] == negative:
                TN += 1  # True Negative
            else:
                FN += 1  # False Negative

    return np.array([[TP, FN], [FP, TN]])

In [None]:
def classification_metrics(conf_matrix):

    TP = conf_matrix[0,0]
    FN = conf_matrix[0,1]
    FP = conf_matrix[1,0]
    TN = conf_matrix[1,1]

    acc = (TP + TN) / (TP + FN + FP + TN)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * (precision * recall) / (precision + recall)
    return acc, f1, precision, recall


# Train teacher model

In [None]:
def train_teacher(model, optimizer, ep, train_dataloader_final, val_dataloader_final, evaluation=False):
    """Train the final model.
    """

    # Start training loop
    print("Start training...\n")
    tr_ep_loss = []
    tr_ep_acc = []
    tr_ep_f1 = []
    tr_ep_precision = []
    tr_ep_recall = []

    val_ep_loss = []
    val_ep_acc = []
    val_ep_f1 = []
    val_ep_precision = []
    val_ep_recall = []

    best_val_recall = 0
    best_val_f1 = 0

    loss_fn = nn.CrossEntropyLoss()

    for e in range(1, ep + 1):
        # =======================================
        #               Training
        # =======================================
        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        loss_sublist = np.array([])
        acc_sublist = np.array([])
        f1_sublist = np.array([])
        precision_sublist = np.array([])
        recall_sublist = np.array([])

        y_actual = np.array([])
        y_pred = np.array([])

        for batch, (x, y) in enumerate(train_dataloader_final):
#             x.to(device)
#             y.to(device)
            batch_counts +=1
            optimizer.zero_grad()

            # positive pair, with encoding
            z = model(x.to(device).float())
            #y_true.append(y.squeeze(1).numpy())

            # calculate loss
            loss = loss_fn(z, y.to(device).squeeze())
            loss.backward()

            preds = torch.exp(z.cpu().data)/torch.sum(torch.exp(z.cpu().data))

            y_actual = np.append(y_actual, y.view(-1).cpu().numpy().astype('int'))
            y_pred = np.append(y_pred, np.argmax(preds.cpu().numpy() ,axis=1).astype('int'))
            loss_sublist = np.append(loss_sublist, loss.cpu().data)

            optimizer.step()

            # Zero out any previously calculated gradients
            model.zero_grad()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        conf_matrix = calculate_confusion_matrix(y_actual, y_pred,1,0)
        acc, f1, precision, recall = classification_metrics(conf_matrix)

        # Reset batch tracking variables
        batch_loss, batch_counts = 0, 0
        t0_batch = time.time()
        #scheduler.step()

        # Calculate the average loss over the entire training data
        avg_train_loss = np.mean(loss_sublist)
        tr_ep_loss.append(avg_train_loss)
        tr_ep_acc.append(acc)
        tr_ep_f1.append(f1)
        tr_ep_precision.append(precision)
        tr_ep_recall.append(recall)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy, val_f1, val_precision, val_recall = evaluate(model, val_dataloader_final)


            if val_f1>best_val_f1:
                best_model_f1=copy.deepcopy(model)
                model_name_location = f"/kaggle/working/best_teacher_model.pt"
                torch.save(best_model_f1.state_dict(), model_name_location)
                best_val_f1 = val_f1
                print("This is the best f1 score!")

            val_ep_loss.append(val_loss)
            val_ep_acc.append(val_accuracy)
            val_ep_f1.append(val_f1)
            val_ep_precision.append(val_precision)
            val_ep_recall.append(val_recall)

            time_elapsed = time.time() - t0_epoch

            # Print the header of the result table
            print(f"{'Epoch':^7} | {'Train Loss':^12} | {'Train Accuracy':^12} | {'Train F1-score':^12} |{'Train Precision':^12} |{'Train Recall':^12} | {'Val Loss':^12} | {'Val Accuuracy':^12} | {'Val F1-score':^12}|{'Val Precision':^12}|{'Val Recall':^12}| {'Elapsed':^9}")
            print("-"*81)
            print(f"{e:^7} | {avg_train_loss:^12.6f} | {acc:^14.6} | {f1:^14.6} | {precision:^14.6} |{recall:^14.6} |{val_loss:^12.6f} | {val_accuracy:^14.6f} | {val_f1:^14.6f}|{val_precision:^14.6f}|{val_recall:^14.6f}| {time_elapsed:^9.2f}")
            print("-"*81)
        print("\n")

    # plot train and valid loss
    plt.plot(list(range(len(val_ep_loss))), val_ep_loss, label = "Validation Loss")
    plt.plot(list(range(len(tr_ep_loss))), tr_ep_loss, label = "Training Loss")
    plt.title('Loss of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig('/kaggle/working/head_loss.png')
    plt.show()

    # plot train and valid accuracy
    plt.plot(list(range(len(val_ep_acc))), val_ep_acc, label = "Validation Accuracy")
    plt.plot(list(range(len(tr_ep_acc))), tr_ep_acc, label = "Training Accuracy")
    plt.title('Accuracy of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig('/kaggle/working/head_acc.png')

    plt.show()

    plt.plot(list(range(len(val_ep_precision))), val_ep_precision, label = "Validation Precision")
    plt.plot(list(range(len(tr_ep_precision))), tr_ep_precision, label = "Training Precision")
    plt.title('Precision of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Precision')
    plt.legend()
    plt.savefig('/kaggle/working/head_precision.png')

    plt.show()

    plt.plot(list(range(len(val_ep_f1))), val_ep_f1, label = "Validation F1 Score")
    plt.plot(list(range(len(tr_ep_f1))), tr_ep_f1, label = "Training F1 Score")
    plt.title('F1-Score of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('F1-Score')
    plt.legend()
    plt.savefig('/kaggle/working/head_f1.png')

    plt.show()

    plt.plot(list(range(len(val_ep_recall))), val_ep_recall, label = "Validation Recall")
    plt.plot(list(range(len(tr_ep_recall))), tr_ep_recall, label = "Training Recall")
    plt.title('Recall of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Recall')
    plt.legend()
    plt.savefig('/kaggle/working/head_recall.png')

    plt.show()
    
    print("Teacher Training complete!")
    print("Best f1-score: best_val_f1")
    return best_val_recall, best_val_f1

In [None]:
def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    val_ep_loss = []
    val_ep_acc = []
    val_ep_f1=[]
    val_ep_precision=[]
    val_ep_recall=[]

    model.eval()
    y_actual = np.array([])
    y_pred = np.array([])

    loss_sublist = np.array([])
    acc_sublist = np.array([])
    f1_sublist = np.array([])
    precision_sublist = np.array([])
    recall_sublist = np.array([])

    loss_fn = nn.CrossEntropyLoss()
    # loss_fn = nn.BCELoss()

    with torch.no_grad():
        for x, y in val_dataloader:

            z = model(x.to(device).float())
            #y_true.append(y.squeeze(1).numpy())
            # Compute the average loss over the validation set.
            val_loss = loss_fn(z,y.to(device).squeeze())

            # model's prediction
            preds = torch.exp(z.cpu().data)/torch.sum(torch.exp(z.cpu().data))
            loss_sublist = np.append(loss_sublist, val_loss.cpu().data)

            y_actual = np.append(y_actual, y.cpu().view(-1).numpy().astype('int'))
            y_pred = np.append(y_pred, np.argmax(preds.cpu().numpy(), axis=1).astype('int'))

        conf_matrix = calculate_confusion_matrix(y_actual, y_pred,1,0)
        acc, f1, precision, recall = classification_metrics(conf_matrix)
    return np.mean(loss_sublist), acc, f1, precision, recall

In [None]:
device = "cuda:0"
# device = "cpu"

In [None]:
epoch = 100
teacher_model.to(device)
optimizer = torch.optim.AdamW(teacher_model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

In [None]:
train_dataloader, val_dataloader = Create_Dataloader(x_train_standardized, x_test_standardized, y_train , y_test, batch_size)
_, best_teacher_f1 = train_teacher(teacher_model, optimizer, epoch, train_dataloader, val_dataloader, evaluation=True)

In [None]:
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits

# Load dataset
model = KAN_model()
model.load_state_dict(torch.load('/kaggle/working/best_teacher_model.pt'))
all_extracted_features = []
all_true_labels = []

global_index = 0 

with torch.no_grad():
    for x, y in val_dataloader:
        x = x.to("cpu").float()
        extracted_features = model(x)

        for i in range(x.size(0)):  # Loop through the batch
            if global_index % 10 == 0:  # Keep only every 10th sample
                all_extracted_features.append(extracted_features[i].cpu().numpy())
                all_true_labels.append(y[i].item())
            global_index += 1
            
X = np.array(all_extracted_features)
y = np.array(all_true_labels)

# Initialize and fit t-SNE
y = y.reshape(-1)
# Initialize and fit t-SNE
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(X)

# Plot the results
plt.figure(figsize=(8, 6))
labels = np.unique(y)
# Loop over the labels to plot each class separately
for label in labels:
    plt.scatter(X_tsne[y == label, 0], X_tsne[y == label, 1], cmap='jet', label=f'Class {label}', s=50, alpha=0.7)

plt.legend()
# plt.colorbar(scatter)
plt.title('t-SNE Visualization SWaT, After Pre-training')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.show()

In [None]:

with open("wadi_teacher.p", "wb") as file:
    dict_tSNE = {}
    dict_tSNE["tSNE"]=X_tsne
    dict_tSNE["labels"]=all_true_labels
    p.dump(dict_tSNE, file)


In [None]:
def calculate_confusion_matrix_vis(model, test_dataloader):
    """Calculates the confusion matrix for the model on the test data."""
    model.eval()
    all_predicted_labels = []
    all_true_labels = []

    with torch.no_grad():
        for x, y in test_dataloader:
            z = model(x.to("cpu").float())
            predicted_labels = torch.argmax(torch.exp(z.cpu().data) / torch.sum(torch.exp(z.cpu().data)), dim=1)

            all_predicted_labels.extend(predicted_labels.cpu().numpy().tolist())
            all_true_labels.extend(y.cpu().numpy().tolist())

    conf_matrix = confusion_matrix(all_true_labels, all_predicted_labels)
    
    classes = ["Normal", "Attack"]

    # Plot confusion matrix
    plt.figure(figsize=(10, 10))
    sns.set(font_scale=1.2)  # Adjust font size
    sns.heatmap(conf_matrix, annot=True, fmt='g', cmap='Blues',
                xticklabels=classes, yticklabels=classes,
                cbar_kws={'label': 'Number of samples'})
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()
    return conf_matrix

In [None]:
# We test mafualda data on the IMS trained data
best_model_path = "/kaggle/working/best_teacher_model.pt"

# load best model trained on IMS
best_model = KAN_model()
# best_model, _ = init_model(best_model,8)
best_model.load_state_dict(torch.load(best_model_path))

calculate_confusion_matrix_vis(best_model, val_dataloader)

# Train Student model single

In [None]:
def train_student(model, optimizer, ep, train_dataloader_final, val_dataloader_final, evaluation=False):
    """Train the final model.
    """

    # Start training loop
    print("Start training...\n")
    tr_ep_loss = []
    tr_ep_acc = []
    tr_ep_f1 = []
    tr_ep_precision = []
    tr_ep_recall = []

    val_ep_loss = []
    val_ep_acc = []
    val_ep_f1 = []
    val_ep_precision = []
    val_ep_recall = []

    best_val_recall = 0
    best_val_f1 = 0

    loss_fn = nn.CrossEntropyLoss()

    for e in range(1, ep + 1):
        # =======================================
        #               Training
        # =======================================
        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        loss_sublist = np.array([])
        acc_sublist = np.array([])
        f1_sublist = np.array([])
        precision_sublist = np.array([])
        recall_sublist = np.array([])

        y_actual = np.array([])
        y_pred = np.array([])

        for batch, (x, y) in enumerate(train_dataloader_final):
#             x.to(device)
#             y.to(device)
            batch_counts +=1
            optimizer.zero_grad()

            # positive pair, with encoding
            z = model(x.to(device).float())
            #y_true.append(y.squeeze(1).numpy())

            # calculate loss
            loss = loss_fn(z, y.to(device).squeeze())
            loss.backward()

            preds = torch.exp(z.cpu().data)/torch.sum(torch.exp(z.cpu().data))

            y_actual = np.append(y_actual, y.view(-1).cpu().numpy().astype('int'))
            y_pred = np.append(y_pred, np.argmax(preds.cpu().numpy() ,axis=1).astype('int'))
            loss_sublist = np.append(loss_sublist, loss.cpu().data)

            optimizer.step()

            # Zero out any previously calculated gradients
            model.zero_grad()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        conf_matrix = calculate_confusion_matrix(y_actual, y_pred,1,0)
        acc, f1, precision, recall = classification_metrics(conf_matrix)

        # Reset batch tracking variables
        batch_loss, batch_counts = 0, 0
        t0_batch = time.time()

        # Calculate the average loss over the entire training data
        avg_train_loss = np.mean(loss_sublist)
        tr_ep_loss.append(avg_train_loss)

        tr_ep_acc.append(acc)
        tr_ep_f1.append(f1)
        tr_ep_precision.append(precision)
        tr_ep_recall.append(recall)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy, val_f1, val_precision, val_recall = evaluate(model, val_dataloader_final)

            if val_f1>best_val_f1:
                best_model_f1=copy.deepcopy(model)
                model_name_location = f"/kaggle/working/best_student_model.pt"
                torch.save(best_model_f1.state_dict(), model_name_location)
                best_val_f1 = val_f1
                print("This is the best f1 score!")

            val_ep_loss.append(val_loss)
            val_ep_acc.append(val_accuracy)
            val_ep_f1.append(val_f1)
            val_ep_precision.append(val_precision)
            val_ep_recall.append(val_recall)

            time_elapsed = time.time() - t0_epoch

            # Print the header of the result table
            print(f"{'Epoch':^7} | {'Train Loss':^12} | {'Train Accuracy':^12} | {'Train F1-score':^12} |{'Train Precision':^12} |{'Train Recall':^12} | {'Val Loss':^12} | {'Val Accuuracy':^12} | {'Val F1-score':^12}|{'Val Precision':^12}|{'Val Recall':^12}| {'Elapsed':^9}")
            print("-"*81)
            print(f"{e:^7} | {avg_train_loss:^12.6f} | {acc:^14.6} | {f1:^14.6} | {precision:^14.6} |{recall:^14.6} |{val_loss:^12.6f} | {val_accuracy:^14.6f} | {val_f1:^14.6f}|{val_precision:^14.6f}|{val_recall:^14.6f}| {time_elapsed:^9.2f}")
            print("-"*81)
        print("\n")

    # plot train and valid loss
    plt.plot(list(range(len(val_ep_loss))), val_ep_loss, label = "Validation Loss")
    plt.plot(list(range(len(tr_ep_loss))), tr_ep_loss, label = "Training Loss")
    plt.title('Loss of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig('/kaggle/working/head_loss.png')
    plt.show()

    # plot train and valid accuracy
    plt.plot(list(range(len(val_ep_acc))), val_ep_acc, label = "Validation Accuracy")
    plt.plot(list(range(len(tr_ep_acc))), tr_ep_acc, label = "Training Accuracy")
    plt.title('Accuracy of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig('/kaggle/working/head_acc.png')

    plt.show()

    plt.plot(list(range(len(val_ep_precision))), val_ep_precision, label = "Validation Precision")
    plt.plot(list(range(len(tr_ep_precision))), tr_ep_precision, label = "Training Precision")
    plt.title('Precision of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Precision')
    plt.legend()
    plt.savefig('/kaggle/working/head_precision.png')

    plt.show()

    plt.plot(list(range(len(val_ep_f1))), val_ep_f1, label = "Validation F1 Score")
    plt.plot(list(range(len(tr_ep_f1))), tr_ep_f1, label = "Training F1 Score")
    plt.title('F1-Score of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('F1-Score')
    plt.legend()
    plt.savefig('/kaggle/working/head_f1.png')

    plt.show()

    plt.plot(list(range(len(val_ep_recall))), val_ep_recall, label = "Validation Recall")
    plt.plot(list(range(len(tr_ep_recall))), tr_ep_recall, label = "Training Recall")
    plt.title('Recall of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Recall')
    plt.legend()
    plt.savefig('/kaggle/working/head_recall.png')

    plt.show()
    
    print("Student Training complete!")
    print(f"Best f1-score: {best_val_f1}")
    return best_val_recall, best_val_f1

In [None]:
epoch = 100
student_model.to(device)
optimizer = torch.optim.AdamW(student_model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

In [None]:
student_model.to(device)
_, best_student_f1 = train_student(student_model, optimizer, epoch, train_dataloader, val_dataloader, evaluation=True)

In [None]:
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits

# Load dataset
model = MLP_model()
model.load_state_dict(torch.load('/kaggle/working/best_student_model.pt'))
all_extracted_features = []
all_true_labels = []

global_index = 0 

with torch.no_grad():
    for x, y in val_dataloader:
        x = x.to("cpu").float()
        extracted_features = model(x)

        for i in range(x.size(0)):  # Loop through the batch
            if global_index % 10 == 0:  # Keep only every 10th sample
                all_extracted_features.append(extracted_features[i].cpu().numpy())
                all_true_labels.append(y[i].item())
            global_index += 1
            
X = np.array(all_extracted_features)
y = np.array(all_true_labels)

# Initialize and fit t-SNE
y = y.reshape(-1)
# Initialize and fit t-SNE
tsne = TSNE(n_components=2, random_state=SEED)
X_tsne = tsne.fit_transform(X)

# Plot the results
plt.figure(figsize=(8, 6))
labels = np.unique(y)
# Loop over the labels to plot each class separately
for label in labels:
    plt.scatter(X_tsne[y == label, 0], X_tsne[y == label, 1], cmap='jet', label=f'Class {label}', s=50, alpha=0.7)

plt.legend()
# plt.colorbar(scatter)
plt.title('t-SNE Visualization SWaT, After Pre-training')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.show()

In [None]:
with open("wadi_student.p", "wb") as file:
    dict_tSNE = {}
    dict_tSNE["tSNE"]=X_tsne
    dict_tSNE["labels"]=all_true_labels
    p.dump(dict_tSNE, file)


In [None]:
# We test mafualda data on the IMS trained data
best_model_path = "/kaggle/working/best_student_model.pt"

# load best model trained on IMS
best_model = MLP_model()
# best_model, _ = init_model(best_model,8)
best_model.load_state_dict(torch.load(best_model_path))

calculate_confusion_matrix_vis(best_model, val_dataloader)

# DKDL train function

In [None]:
def train_student_DKDL(student_model, teacher_model, optimizer, ep, train_dataloader_final, val_dataloader_final, evaluation=False):
    """Train the final model.
    """
    student_model.train()
    teacher_model.eval()
    
    # Start training loop
    print("Start training...\n")
    tr_ep_loss = []
    tr_ep_acc = []
    tr_ep_f1 = []
    tr_ep_precision = []
    tr_ep_recall = []

    val_ep_loss = []
    val_ep_acc = []
    val_ep_f1 = []
    val_ep_precision = []
    val_ep_recall = []

    best_val_recall = 0
    best_val_f1 = 0

    loss_fn = nn.CrossEntropyLoss(reduction='sum')

    for e in range(1, ep + 1):
        # =======================================
        #               Training
        # =======================================
        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # For each batch of training data...
        loss_sublist = np.array([])
        acc_sublist = np.array([])
        f1_sublist = np.array([])
        precision_sublist = np.array([])
        recall_sublist = np.array([])

        y_actual = np.array([])
        y_pred = np.array([])

        for batch, (x, y) in enumerate(train_dataloader_final):
            x = x.to(device)
            y = y.to(device)
            batch_counts +=1
            optimizer.zero_grad()

            # positive pair, with encoding
            y_s = student_model(x.float())
            y_t = teacher_model(x.float())

            # calculate loss
            hard_loss = loss_fn(y_t, y.to(device).squeeze())
            soft_loss = dkd_loss(y_s, y_t, y, config)
            soft_loss = min(e / config.warmup, 1.0) * soft_loss # DKD
            loss = (1-config.alpha)*hard_loss + config.alpha*soft_loss
            
            loss.backward()

            preds = torch.exp(y_s.cpu().data)/torch.sum(torch.exp(y_s.cpu().data))

            y_actual = np.append(y_actual, y.view(-1).cpu().numpy().astype('int'))
            y_pred = np.append(y_pred, np.argmax(preds.cpu().numpy() ,axis=1).astype('int'))
            loss_sublist = np.append(loss_sublist, loss.cpu().data)

            optimizer.step()

            # Zero out any previously calculated gradients
            student_model.zero_grad()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(student_model.parameters(), 1.0)

        conf_matrix = calculate_confusion_matrix(y_actual, y_pred,1,0)
        acc, f1, precision, recall = classification_metrics(conf_matrix)

        # Reset batch tracking variables
        batch_loss, batch_counts = 0, 0
        t0_batch = time.time()


        # Calculate the average loss over the entire training data
        avg_train_loss = np.mean(loss_sublist)
        tr_ep_loss.append(avg_train_loss)
        tr_ep_acc.append(acc)
        tr_ep_f1.append(f1)
        tr_ep_precision.append(precision)
        tr_ep_recall.append(recall)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy, val_f1, val_precision, val_recall = evaluate(student_model, val_dataloader_final)

            if val_f1>best_val_f1:
                best_model_f1=copy.deepcopy(student_model)
                model_name_location = f"/kaggle/working/best_student_model_DKDL.pt"
                torch.save(best_model_f1.state_dict(), model_name_location)
                best_val_f1 = val_f1
                print("This is the best f1 score!")

            val_ep_loss.append(val_loss)
            val_ep_acc.append(val_accuracy)
            val_ep_f1.append(val_f1)
            val_ep_precision.append(val_precision)
            val_ep_recall.append(val_recall)

            time_elapsed = time.time() - t0_epoch

            # Print the header of the result table
            print(f"{'Epoch':^7} | {'Train Loss':^12} | {'Train Accuracy':^12} | {'Train F1-score':^12} |{'Train Precision':^12} |{'Train Recall':^12} | {'Val Loss':^12} | {'Val Accuuracy':^12} | {'Val F1-score':^12}|{'Val Precision':^12}|{'Val Recall':^12}| {'Elapsed':^9}")
            print("-"*81)
            print(f"{e:^7} | {avg_train_loss:^12.6f} | {acc:^14.6} | {f1:^14.6} | {precision:^14.6} |{recall:^14.6} |{val_loss:^12.6f} | {val_accuracy:^14.6f} | {val_f1:^14.6f}|{val_precision:^14.6f}|{val_recall:^14.6f}| {time_elapsed:^9.2f}")
            print("-"*81)
        print("\n")

    # plot train and valid loss
    plt.plot(list(range(len(val_ep_loss))), val_ep_loss, label = "Validation Loss")
    plt.plot(list(range(len(tr_ep_loss))), tr_ep_loss, label = "Training Loss")
    plt.title('Loss of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig('/kaggle/working/head_loss.png')
    plt.show()

    # plot train and valid accuracy
    plt.plot(list(range(len(val_ep_acc))), val_ep_acc, label = "Validation Accuracy")
    plt.plot(list(range(len(tr_ep_acc))), tr_ep_acc, label = "Training Accuracy")
    plt.title('Accuracy of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig('/kaggle/working/head_acc.png')

    plt.show()

    plt.plot(list(range(len(val_ep_precision))), val_ep_precision, label = "Validation Precision")
    plt.plot(list(range(len(tr_ep_precision))), tr_ep_precision, label = "Training Precision")
    plt.title('Precision of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Precision')
    plt.legend()
    plt.savefig('/kaggle/working/head_precision.png')

    plt.show()

    plt.plot(list(range(len(val_ep_f1))), val_ep_f1, label = "Validation F1 Score")
    plt.plot(list(range(len(tr_ep_f1))), tr_ep_f1, label = "Training F1 Score")
    plt.title('F1-Score of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('F1-Score')
    plt.legend()
    plt.savefig('/kaggle/working/head_f1.png')

    plt.show()

    plt.plot(list(range(len(val_ep_recall))), val_ep_recall, label = "Validation Recall")
    plt.plot(list(range(len(tr_ep_recall))), tr_ep_recall, label = "Training Recall")
    plt.title('Recall of Classification')
    plt.xlabel('Number of epochs')
    plt.ylabel('Recall')
    plt.legend()
    plt.savefig('/kaggle/working/head_recall.png')

    plt.show()
    
    print("Student Training complete!")
    print(f"Best f1-score: {best_val_f1}")
    return best_val_recall, best_val_f1

In [None]:
class configTrainDKD:
    alpha = 0.2 
    beta = 5
    gammar = 1
    warmup = 5
    temperature = 2.5

config = configTrainDKD()

In [None]:
# Draw conf mat for
best_model_teacher_model = "/kaggle/working/best_teacher_model.pt"

# load best model trained
best_teacher_model = KAN_model()
best_teacher_model.load_state_dict(torch.load(best_model_teacher_model))
best_teacher_model.to(device)
best_teacher_model = best_teacher_model.float()

student_model = MLP_model()
student_model.to(device)
student_model= student_model.float()

In [None]:
optimizer = torch.optim.AdamW(student_model.parameters(), lr=0.0004, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

In [None]:
epoch=400
train_student_DKDL(student_model, teacher_model, optimizer, epoch, train_dataloader, val_dataloader, evaluation=True)

In [None]:
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits

# Load dataset
model = MLP_model()
model.load_state_dict(torch.load('/kaggle/working/best_student_model_DKDL.pt'))
all_extracted_features = []
all_true_labels = []

global_index = 0 

with torch.no_grad():
    for x, y in val_dataloader:
        x = x.to("cpu").float()
        extracted_features = model(x)

        for i in range(x.size(0)):  # Loop through the batch
            if global_index % 10 == 0:  # Keep only every 10th sample
                all_extracted_features.append(extracted_features[i].cpu().numpy())
                all_true_labels.append(y[i].item())
            global_index += 1
            
X = np.array(all_extracted_features)
y = np.array(all_true_labels)

# Initialize and fit t-SNE
y = y.reshape(-1)
# Initialize and fit t-SNE
tsne = TSNE(n_components=2, random_state=SEED)
X_tsne = tsne.fit_transform(X)

# Plot the results
plt.figure(figsize=(8, 6))
labels = np.unique(y)
# Loop over the labels to plot each class separately
for label in labels:
    plt.scatter(X_tsne[y == label, 0], X_tsne[y == label, 1], cmap='jet', label=f'Class {label}', s=50, alpha=0.7)

plt.legend()
# plt.colorbar(scatter)
plt.title('t-SNE Visualization SWaT, After Pre-training')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.show()

In [None]:
with open("wadi_student_DKD.p", "wb") as file:
    dict_tSNE = {}
    dict_tSNE["tSNE"]=X_tsne
    dict_tSNE["labels"]=all_true_labels
    p.dump(dict_tSNE, file)


In [None]:
# We test mafualda data on the IMS trained data
best_model_path = "/kaggle/working/best_student_model_DKDL.pt"

# load best model trained on IMS
best_model = MLP_model()
# best_model, _ = init_model(best_model,8)
best_model.load_state_dict(torch.load(best_model_path))

calculate_confusion_matrix_vis(best_model, val_dataloader)

# t-SNE Plots


## Student model

In [None]:
with open("/kaggle/input/dkd-cyber-tsne-dataset/wadi_student.p", "rb") as file:
    con = p.load( file)

labels_gp = con["labels"]
tsne_gp= con["tSNE"]
y_gp = np.array(labels_gp)
y_gp = y_gp.reshape(-1)
labels_un = np.unique(y_gp)
plt.figure(figsize=(10,10))
for label in labels_un:
    plt.scatter(tsne_gp[y_gp == label, 0], tsne_gp[y_gp == label, 1], cmap='jet', label=label_dict.get(label), s=50, alpha=0.9)

plt.legend(framealpha=0.2, fontsize=13)
# plt.colorbar(scatter)
plt.title('WADI Dataset t-SNE Visualization, Student Model',fontsize=20)
plt.xlabel('t-SNE Component 1',fontsize=15)
plt.ylabel('t-SNE Component 2',fontsize=15)
plt.savefig("wadi_student_tsne.eps", dpi=300)
plt.show()


## Teacher model

In [None]:
with open("/kaggle/input/dkd-cyber-tsne-dataset/wadi_teacher.p", "rb") as file:
    con = p.load( file)

labels_gp = con["labels"]
tsne_gp= con["tSNE"]
y_gp = np.array(labels_gp)
y_gp = y_gp.reshape(-1)
labels_un = np.unique(y_gp)
plt.figure(figsize=(10,10))
for label in labels_un:
    plt.scatter(tsne_gp[y_gp == label, 0], tsne_gp[y_gp == label, 1], cmap='jet', label=label_dict.get(label), s=50, alpha=0.9)

plt.legend(framealpha=0.2, fontsize=13)
# plt.colorbar(scatter)
plt.title('WADI Dataset t-SNE Visualization, Teacher Model',fontsize=20)
plt.xlabel('t-SNE Component 1',fontsize=15)
plt.ylabel('t-SNE Component 2',fontsize=15)
plt.savefig("wadi_teacher_tsne.eps", dpi=300)
plt.show()


## DKD-student model

In [None]:
with open("/kaggle/input/dkd-cyber-tsne-dataset/wadi_student_DKD.p", "rb") as file:
    con = p.load( file)

labels_gp = con["labels"]
tsne_gp= con["tSNE"]
y_gp = np.array(labels_gp)
y_gp = y_gp.reshape(-1)
labels_un = np.unique(y_gp)
plt.figure(figsize=(10,10))
for label in labels_un:
    plt.scatter(tsne_gp[y_gp == label, 0], tsne_gp[y_gp == label, 1], cmap='jet', label=label_dict.get(label), s=50, alpha=0.9)

plt.legend(framealpha=0.2, fontsize=13)
# plt.colorbar(scatter)
plt.title('WADI Dataset t-SNE Visualization, DKD-Student Model',fontsize=20)
plt.xlabel('t-SNE Component 1',fontsize=15)
plt.ylabel('t-SNE Component 2',fontsize=15)
plt.savefig("wadi_dkd_student_tsne.eps", dpi=300)
plt.show()
