# Imports:

In [None]:
from connect import *

In [None]:
import pandas as pd
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from tqdm.notebook import tqdm
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [None]:
import NAR.utils as utils
import NAR.data_management as dm
import NAR.plotting_utils as pu
import NAR.masking as masking
import NAR.models as models

# Reload modules (DEV):

In [None]:
reload_modules([utils, dm, pu, masking, models])

# Checkpoint:

### Edge to IIoT set:

In [None]:
df = pd.read_csv(
    '../../../../nfs/jcevallos/datasets/edge-iiot/preprocessed/preprocessed-edge-iiotset.csv',
    low_memory=False)

In [None]:
X = df.drop(['Attack_label', 'attack_macro_cat', 'Attack_type'], axis=1)
y = df['Attack_type']

categorical_indicator = X.dtypes == 'O'

categorical_columns = X.columns[list(np.where(np.array(categorical_indicator)==True)[0])].tolist()
cont_columns = list(set(X.columns.tolist()) - set(categorical_columns))

cat_idxs = list(np.where(np.array(categorical_indicator)==True)[0])
con_idxs = list(set(range(len(X.columns))) - set(cat_idxs))

# Categories and target classes to natural numbers:
cat_dims = []
for col in categorical_columns:
    l_enc = LabelEncoder()
    X[col] = l_enc.fit_transform(X[col].values)
    cat_dims.append(len(l_enc.classes_))

X = X.values
y = y.values
l_enc = LabelEncoder()
y = l_enc.fit_transform(y)

# num of classes:
y_dim = len(np.unique(y))

In [None]:
# Converting to suitable format:
data = pd.DataFrame(X)
data['Label'] = l_enc.inverse_transform(y)

# zdas:
zdas = ['SQL_injection', 'Uploading',
        'Vulnerability_scanner', 'XSS']
zdas
train_zdas = ['SQL_injection', 'Uploading']
test_zdas = ['Vulnerability_scanner', 'XSS']
test_only_knowns = ['Backdoor', 'DDoS_HTTP']

# Masking:
data = masking.mask_generic_data(
    data=data,
    zdas=zdas,
    )

train_data, test_data = masking.split_real_data_gen_zda(
    data=data,
    train_zdas=train_zdas,
    test_zdas=test_zdas,
    test_only_knowns=test_only_knowns
    )

classes = data['Label'].unique()

# Training:

## helper code:

In [None]:
def save_stuff(prefix):
    torch.save(
        encoder.state_dict(),
        prefix+'enc.pt')
    torch.save(
        decoder_1_b.state_dict(),
        prefix+'_dec_b.pt')

In [None]:
def first_phase_simple(
        sample_batch):

    global cs_cm_1
    global os_cm_1
    global metrics_dict

    # get masks: THESE ARE NOT COMPLEMETARY!
    zda_mask, \
        known_classes_mask, \
        unknown_1_mask, \
        active_query_mask = utils.get_gennaro_masks(
            sample_batch[1],
            N_QUERY,
            device=device)

    # get one_hot_labels:
    oh_labels = utils.get_oh_labels(
        decimal_labels=sample_batch[1][:, 0].long(),
        total_classes=max_prototype_buffer,
        device=device)

    # mask labels:
    oh_masked_labels = utils.get_one_hot_masked_labels(
        oh_labels,
        unknown_1_mask,
        device=device)

    # encoding input space:
    encoded_inputs = encoder(
        sample_batch[0].float())

    # processing
    decoded_1, hiddens_1, predicted_kernel = processor_1(
        encoded_inputs,
        oh_masked_labels)

    # semantic kernel:
    semantic_kernel = oh_labels @ oh_labels.T
    # Processor regularization:
    proc_1_reg_loss = utils.get_kernel_kernel_loss(
        semantic_kernel,
        predicted_kernel,
        a_w=attr_w,
        r_w=rep_w)

    # Transform lables for Few_shot Closed-set classif.
    # compatible with the design of models.get_centroids functions,
    # wich is called by our GAT processors.
    unique_labels, transformed_labels = sample_batch[1][:, 0][active_query_mask].unique(
        return_inverse=True)

    # closed set classification
    dec_1_loss_a = decoder_1a_criterion(
        decoded_1[active_query_mask],
        transformed_labels)

    # Detach closed from open set gradients
    input_for_os_dec = decoded_1.detach()
    input_for_os_dec.requires_grad = True

    # Unknown cluster prediction:
    predicted_unknown_1s = decoder_1_b(
        scores=input_for_os_dec[unknown_1_mask]
        )

    # open-set loss:
    dec_1_loss_b = decoder_1b_criterion(
        predicted_unknown_1s,
        zda_mask[unknown_1_mask].float().unsqueeze(-1))

    # inverse transform cs preds
    it_preds = utils.inverse_transform_preds(
        transormed_preds=decoded_1[active_query_mask],
        real_labels=unique_labels,
        real_class_num=max_prototype_buffer)

    #
    # REPORTING:
    #

    # Closed set confusion matrix
    cs_cm_1 += utils.efficient_cm(
        preds=it_preds.detach(),
        targets=sample_batch[1][:, 0][active_query_mask].long())

    # Open set confusion matrix
    os_cm_1 += utils.efficient_os_cm(
        preds=(predicted_unknown_1s.detach() > 0.5).long(),
        targets=zda_mask[unknown_1_mask].long()
        )

    # accuracies:
    CS_acc = utils.get_acc(
        logits_preds=it_preds,
        oh_labels=oh_labels[active_query_mask])

    OS_acc = utils.get_binary_acc(
        logits=predicted_unknown_1s.detach(),
        labels=zda_mask[unknown_1_mask].float().unsqueeze(-1))

    OS_b_acc = utils.get_balanced_accuracy(
                os_cm=os_cm_1,
                n_w=balanced_acc_n_w
                )

    # for reporting:
    metrics_dict['losses_1a'].append(dec_1_loss_a.item())
    metrics_dict['proc_reg_loss1'].append(proc_1_reg_loss.item())
    metrics_dict['CS_accuracies'].append(CS_acc.item())
    metrics_dict['losses_1b'].append(dec_1_loss_b.item())
    metrics_dict['OS_accuracies'].append(OS_acc.item())
    metrics_dict['OS_B_accuracies'].append(OS_b_acc.item())

    # Processor loss:
    proc_1_loss = dec_1_loss_a + proc_1_reg_loss

    return proc_1_loss, \
        dec_1_loss_b, \
        hiddens_1, \
        decoded_1

## init data:

In [45]:
save = True
wb = True

In [46]:
# Generate Data
torch_seed = 1234
torch.manual_seed(torch_seed)
np.random.seed(42)  # Few Shot sampling uses np.random to choose classes...


#
#
# Initialize
#
#
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dataset and Dataloader:
train_dataset = dm.Simple_Dataset(
    features=train_data.drop(columns=[
        'Label',
        'ZdA']).values,
    df=train_data)

test_dataset = dm.Simple_Dataset(
    features=test_data.drop(columns=[
        'Label',
        'ZdA']).values,
    df=test_data)

# Number of classes per task :
# two of them are ZdAs, one is a type B and the other a type A
N_WAY = 5
N_SHOT = 5   # Number of samples per class in the support set
N_QUERY = 20  # Number of samples per class in the query set

n_train_tasks = 500    # For speedy tests, reduce here...
n_eval_tasks = 50     # For speedy tests, reduce here...


num_of_test_classes = len(test_dataset.classes)
num_of_train_classes = len(train_dataset.classes)

train_loader = DataLoader(
    dataset=train_dataset,
    sampler=dm.FewShotSampler_Simple(
                dataset=train_dataset,
                n_tasks=n_train_tasks,
                classes_per_it=N_WAY,
                k_shot=N_SHOT,
                q_shot=N_QUERY),
    num_workers=10,
    drop_last=True,
    collate_fn=dm.convenient_cf)

test_loader = DataLoader(
    dataset=test_dataset,
    sampler=dm.FewShotSampler_Simple(
                dataset=test_dataset,
                n_tasks=n_eval_tasks,
                classes_per_it=N_WAY,
                k_shot=N_SHOT,
                q_shot=N_QUERY),
    num_workers=10,
    drop_last=True,
    collate_fn=dm.convenient_cf)

## init architectures:

In [47]:
###
# Hyper params:
###
max_prototype_buffer = 15
lr = 0.001
# training parameters
n_epochs = 120
norm = "batch"
dropout = 0.1
patience = 40
lambda_os = 1

processor_attention_heads = 8
h_dim = 1024
report_step_frequency = 100
pos_weight = 5
architectures = 'GATV5 Confidence Dec'
balanced_acc_n_w = 0.5
attr_w = 1
rep_w = 1
natural_inputs_dim = 46
run_name = f'EdgeIIoTset from scratch'

if wb:
    wandb.init(project='Nero_1.1',
               name=run_name,
               config={"N_SHOT": N_SHOT,
                       "N_QUERY": N_QUERY,
                       "N_WAY": N_WAY,
                       "num_of_test_classes": num_of_test_classes,
                       "num_of_train_classes": num_of_train_classes,
                       "train_batch_size": iter(train_loader).__next__()[0].shape[0],
                       "len(train_loader)": len(train_loader),
                       "len(test_dataset)": len(test_dataset),
                       "max_prototype_buffer": max_prototype_buffer,
                       "device": device,
                       "natural_inputs_dim": natural_inputs_dim,
                       "h_dim": h_dim,
                       "lr": lr,
                       "n_epochs": n_epochs,
                       "norm": norm,
                       "dropout": dropout,
                       "patience": patience,
                       'zdas': data[data.ZdA == True]['Label'].unique(),
                       "lambda_os": lambda_os,
                       "positive_weight": pos_weight,
                       "architectures": architectures,
                       "balanced_acc_n_w": balanced_acc_n_w,
                       "attr_w": attr_w,
                       "rep_w": rep_w
                       })
else:
    print(run_name)

# Encoder
encoder = models.Encoder(
    in_features=natural_inputs_dim,
    out_features=h_dim,
    norm=norm,
    dropout=dropout,
    ).to(device)

# First phase:
processor_1 = models.GAT_V5_Processor(
                h_dim=h_dim,
                processor_attention_heads=processor_attention_heads,
                dropout=dropout,
                device=device
                ).to(device)

decoder_1a_criterion = nn.CrossEntropyLoss()

decoder_1_b = models.Confidence_Decoder(
                in_dim=N_WAY-1,  # Subtract 1 ZdA
                dropout=dropout,
                device=device
                ).to(device)

decoder_1b_criterion = nn.BCEWithLogitsLoss(
    pos_weight=torch.Tensor([pos_weight])).to(device)


params_for_processor_optimizer = \
        list(encoder.parameters()) + \
        list(processor_1.parameters())

processor_optimizer = optim.Adam(
    params_for_processor_optimizer,
    lr=lr)

os_optimizer = optim.Adam(
    decoder_1_b.parameters(),
    lr=lr)


# TRAINING
max_eval_TNR = torch.zeros(1, device=device)
epochs_without_improvement = 0

In [48]:
wandb.watch(processor_1)
wandb.watch(encoder)
wandb.watch(decoder_1_b)

[]

## Train (from scratch):

In [49]:
for epoch in tqdm(range(n_epochs)):

    # TRAIN
    encoder.train()
    processor_1.train()
    decoder_1_b.train()

    # reset conf Mats
    cs_cm_1 = torch.zeros(
        [max_prototype_buffer, max_prototype_buffer],
        device=device)
    os_cm_1 = torch.zeros([2, 2], device=device)

    # reset metrics dict
    metrics_dict = utils.reset_metrics_dict()

    # go!
    for batch_idx, sample_batch in enumerate(train_loader):
        # go to cuda:
        sample_batch = sample_batch[0].to(device), sample_batch[1].to(device)

        # PHASE 1
        proc_loss, \
            os_loss, \
            hiddens_1, \
            decoded_1 = first_phase_simple(
                                sample_batch)

        # Learning
        processor_optimizer.zero_grad()
        proc_loss.backward()
        processor_optimizer.step()

        os_loss = os_loss
        os_optimizer.zero_grad()
        os_loss.backward()
        os_optimizer.step()

        # Reporting
        step = batch_idx + (epoch * len(train_loader))

        if step % report_step_frequency == 0:
            utils.reporting_gennaro(
                'train',
                epoch,
                metrics_dict,
                step,
                wb,
                wandb)

    pu.super_plotting_function_gennaro(
                phase='Training',
                labels=sample_batch[1].cpu(),
                hiddens_1=hiddens_1.detach().cpu(),
                scores_1=decoded_1.detach().cpu(),
                cs_cm_1=cs_cm_1.cpu(),
                os_cm_1=os_cm_1.cpu(),
                wb=wb,
                wandb=wandb,
                complete_classes=classes,
                )

    with torch.inference_mode():

        # Evaluation
        encoder.eval()
        processor_1.eval()
        decoder_1_b.eval()

        # reset conf Mats
        cs_cm_1 = torch.zeros(
            [max_prototype_buffer, max_prototype_buffer],
            device=device)
        os_cm_1 = torch.zeros([2, 2], device=device)

        # reset metrics dict
        metrics_dict = utils.reset_metrics_dict()

        # go!
        for batch_idx, sample_batch in enumerate(test_loader):
            # go to cuda:
            sample_batch = sample_batch[0].to(device), sample_batch[1].to(device)

            # PHASE 1
            proc_1_loss, \
                os_1_loss, \
                hiddens_1, \
                decoded_1 = first_phase_simple(
                                    sample_batch)
            # Reporting
            step = batch_idx + (epoch * len(test_loader))

            if step % report_step_frequency == 0:
                utils.reporting_gennaro(
                    'eval',
                    epoch,
                    metrics_dict,
                    step,
                    wb,
                    wandb)

        pu.super_plotting_function_gennaro(
                phase='Evaluation',
                labels=sample_batch[1].cpu(),
                hiddens_1=hiddens_1.detach().cpu(),
                scores_1=decoded_1.detach().cpu(),
                cs_cm_1=cs_cm_1.cpu(),
                os_cm_1=os_cm_1.cpu(),
                wb=wb,
                wandb=wandb,
                complete_classes=classes,
            )

        # Checking for improvement
        curr_TNR = np.array(metrics_dict['OS_B_accuracies']).mean()


        if curr_TNR > max_eval_TNR:
            max_eval_TNR = curr_TNR
            epochs_without_improvement = 0
            save_stuff(run_name+'_e'+str(epoch))
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= patience:
            print(f'Early stopping at episode {step}')
            if wb:
                wandb.log({'Early stopping at episode': step})
            break

if wb:
    wandb.finish()

  0%|          | 0/120 [00:00<?, ?it/s]

Early stopping at episode 39806203


VBox(children=(Label(value='30.189 MB of 30.189 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
CS1 accuracy_eval:,▁▆▅▆▆▇▆▅▇▅▆▆▆▆▅▄▅▆▆▇▅█▆▁▅█▅▆▅▅▄▅▆▆▆▆▆▆
CS1 accuracy_train:,▁▅▅▅▅▆▅▅▅▆▅▆▆█▆▅▅▅▅▅▅▅▅▅▅▅▆▅▅▅▆▅▅▆▅▅▆▅▅▆
Early stopping at episode,▁
OS1 Bal. accuracy_eval:,█▄▅▃▂▄▃▂▂▃▄▄▃▄▄▂▃▄▂▇▃▆▂▄▅█▄▆▄▄▃▁▄▅▄▄▄▅
OS1 Bal. accuracy_train:,▁▆▆▆▆▆▆▇▇█▇▅▇▅▇█▇▇▇▇▇▇▇▇█▇▇▇█▇█▇█▇██████
OS1 accuracy_eval:,█▃▃▅▁▄▃▄▃▂▅▃▃▁▃▄▃▄▃▇▃▄▁▆▅█▃▄▅▂▃▃▄▃▄▃▂▄
OS1 accuracy_train:,▁▇▅▆▇▇█████▇█▇██████████████████████████
epoch:,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
mean dec_1_loss_a_eval:,▃▂▂▂▂▂▄▂▂▃▂▂▂▃▅▅▂▅▄▁▂▁▂▅▃▁▃▃▃▃▃▅▂█▂▂▂▃
mean dec_1_loss_a_train:,█▇▆▆▄▃▂▂▂▂▂▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
CS1 accuracy_eval:,0.9234
CS1 accuracy_train:,0.9573
Early stopping at episode,39806203.0
OS1 Bal. accuracy_eval:,0.87775
OS1 Bal. accuracy_train:,0.87822
OS1 accuracy_eval:,0.89037
OS1 accuracy_train:,0.88423
epoch:,81.0
mean dec_1_loss_a_eval:,0.69592
mean dec_1_loss_a_train:,0.13226


In [None]:
wandb.finish()

## Train (fine tune a pre-trained Neural algorithmic processor):

In [None]:
processor_1.load_state_dict(torch.load('GENNARO-processor.pt'))
processor_1.eval()

# Freeze the pre-trained processor
for param in processor_1.parameters():
    param.requires_grad = False

In [None]:
for epoch in tqdm(range(n_epochs)):

    # TRAIN
    encoder.train()
    decoder_1_b.train()

    # reset conf Mats
    cs_cm_1 = torch.zeros(
        [max_prototype_buffer, max_prototype_buffer],
        device=device)
    os_cm_1 = torch.zeros([2, 2], device=device)

    # reset metrics dict
    metrics_dict = utils.reset_metrics_dict()

    # go!
    for batch_idx, sample_batch in enumerate(train_loader):
        # go to cuda:
        sample_batch = sample_batch[0].to(device), sample_batch[1].to(device)

        # PHASE 1
        proc_loss, \
            os_loss, \
            hiddens_1, \
            decoded_1 = first_phase_simple(
                                sample_batch)

        # Learning
        processor_optimizer.zero_grad()
        proc_loss.backward()
        processor_optimizer.step()

        os_loss = os_loss
        os_optimizer.zero_grad()
        os_loss.backward()
        os_optimizer.step()

        # Reporting
        step = batch_idx + (epoch * len(train_loader))

        if step % report_step_frequency == 0:
            utils.reporting_gennaro(
                'train',
                epoch,
                metrics_dict,
                step,
                wb,
                wandb)

    pu.super_plotting_function_gennaro(
                phase='Training',
                labels=sample_batch[1].cpu(),
                hiddens_1=hiddens_1.detach().cpu(),
                scores_1=decoded_1.detach().cpu(),
                cs_cm_1=cs_cm_1.cpu(),
                os_cm_1=os_cm_1.cpu(),
                wb=wb,
                wandb=wandb,
                complete_classes=classes,
                )

    with torch.inference_mode():

        # Evaluation
        encoder.eval()
        decoder_1_b.eval()

        # reset conf Mats
        cs_cm_1 = torch.zeros(
            [max_prototype_buffer, max_prototype_buffer],
            device=device)
        os_cm_1 = torch.zeros([2, 2], device=device)

        # reset metrics dict
        metrics_dict = utils.reset_metrics_dict()

        # go!
        for batch_idx, sample_batch in enumerate(test_loader):
            # go to cuda:
            sample_batch = sample_batch[0].to(device), sample_batch[1].to(device)

            # PHASE 1
            proc_1_loss, \
                os_1_loss, \
                hiddens_1, \
                decoded_1 = first_phase_simple(
                                    sample_batch)

            # Reporting
            step = batch_idx + (epoch * len(test_loader))

            if step % report_step_frequency == 0:
                utils.reporting_gennaro(
                    'eval',
                    epoch,
                    metrics_dict,
                    step,
                    wb,
                    wandb)

        pu.super_plotting_function_gennaro(
                phase='Evaluation',
                labels=sample_batch[1].cpu(),
                hiddens_1=hiddens_1.detach().cpu(),
                scores_1=decoded_1.detach().cpu(),
                cs_cm_1=cs_cm_1.cpu(),
                os_cm_1=os_cm_1.cpu(),
                wb=wb,
                wandb=wandb,
                complete_classes=classes,
            )

        # Checking for improvement
        curr_TNR = utils.get_balanced_accuracy(
                pos_labels=sample_batch[1][:, 1].long(),
                n_tasks=n_eval_tasks,
                os_cm=os_cm_1,
                n_w=balanced_acc_n_w
                )

        if curr_TNR > max_eval_TNR:
            max_eval_TNR = curr_TNR
            epochs_without_improvement = 0
            save_stuff(run_name)
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= patience:
            print(f'Early stopping at episode {step}')
            if wb:
                wandb.log({'Early stopping at episode': step})
            break

if wb:
    wandb.finish()

In [None]:
wandb.finish()