    This Source Code Form is subject to the terms of the Mozilla Public
    License, v. 2.0. If a copy of the MPL was not distributed with this
    file, You can obtain one at http://mozilla.org/MPL/2.0/.

# Load and preprocess dataset

In [5]:
import pandas as pd

In [6]:
csv_file = 'datasets/netflow-v1/NF-BoT-IoT.csv'
ignore_cols = {'IPV4_SRC_ADDR', 'IPV4_DST_ADDR', 'Label'}
categorical_cols = ['Attack', 'TCP_FLAGS', 'PROTOCOL']

In [7]:
df = pd.read_csv(csv_file,
                 usecols=lambda x: x not in ignore_cols,
                 dtype={col: 'category' for col in categorical_cols})
df.head()

Unnamed: 0,L4_SRC_PORT,L4_DST_PORT,PROTOCOL,L7_PROTO,IN_BYTES,OUT_BYTES,IN_PKTS,OUT_PKTS,TCP_FLAGS,FLOW_DURATION_MILLISECONDS,Attack
0,52670,53,17,5.212,71,126,1,1,0,4294966,Benign
1,49160,4444,6,0.0,217753000,199100,4521,4049,24,4176249,Theft
2,3456,80,17,0.0,8508021,8918372,9086,9086,0,4175916,Benign
3,80,8080,6,7.0,8442138,9013406,9086,9086,0,4175916,Benign
4,80,80,6,7.0,8374706,0,9086,0,0,4175916,Benign


# Train classifier model

In [8]:
from sklearn.model_selection import train_test_split
from xgboost import XGBRFClassifier

In [9]:
X, y = df.iloc[:,:-1], df.iloc[:,-1].cat.codes
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [45]:
classifier = XGBRFClassifier(n_estimators=32, tree_method='gpu_hist', enable_categorical=True, random_state=42)
classifier.fit(X_train, y_train, eval_set=[(X_train, y_train)]);

[0]	validation_0-mlogloss:0.44981


In [46]:
"{:.0%}".format(classifier.score(X_test, y_test))

'84%'

# Train explanation model

In [140]:
import torch
import torch.nn as nn

import pyro
import pyro.distributions as dist

from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from tqdm import tqdm

In [47]:
torch.set_default_dtype(torch.float64)
pyro.distributions.enable_validation(True)
pyro.set_rng_seed(42)

In [26]:
def Decoder(z_dim, X_dim, hidden_dim):
    return nn.Sequential(
        nn.Linear(z_dim, hidden_dim),
        nn.Softplus(),
        nn.Linear(hidden_dim, X_dim),
        nn.Sigmoid()
    )

In [50]:
class Encoder(nn.Module):
    def __init__(self, z_dim, X_dim, hidden_dim):
        super().__init__()      
        self.fc1 = nn.Linear(X_dim, hidden_dim)
        self.fc21 = nn.Linear(hidden_dim, z_dim)
        self.fc22 = nn.Linear(hidden_dim, z_dim)
        self.softplus = nn.Softplus()
    
    def forward(self, X):     
        hidden = self.softplus(self.fc1(X))
        z_loc = self.fc21(hidden)
        z_scale = torch.exp(self.fc22(hidden))
        return z_loc, z_scale

In [154]:
class CausalVAE(nn.Module):
    def __init__(self, K, L, X_dim, hidden_dim=16, use_cuda=False):
        super().__init__()
        self.K, self.L = K, L
        self.z_dim = K + L
        self.encoder = Encoder(self.z_dim, X_dim, hidden_dim)
        self.decoder = Decoder(self.z_dim, X_dim, hidden_dim)
        
        if use_cuda:
            self.cuda()
    
    def model(self, X, y=None):
        pyro.module('decoder', self.decoder)
        batch_size = X.shape[0]
        
        with pyro.plate('data', batch_size):
            z_loc = X.new_zeros((batch_size, self.z_dim))
            z_scale = X.new_ones((batch_size, self.z_dim))
            z = pyro.sample('latent', dist.Normal(z_loc, z_scale).to_event(1))
            
            Xhat = self.decoder(z)
            pyro.sample('obs', dist.Normal(Xhat, X.new_ones(X.shape[1])).to_event(1), obs=X)
            
        
    def guide(self, X, y=None):
        pyro.module('encoder', self.encoder)
        
        with pyro.plate('data', X.shape[0]):
            z_loc, z_scale = self.encoder(X)
            pyro.sample('latent', dist.Normal(z_loc, z_scale).to_event(1))
            
    def reconstruct(self, x):
        z_loc, z_scale = self.encoder(x)
        z = dist.Normal(z_loc, z_scale).sample()
        loc_img = self.decoder(z)
        return loc_img

In [155]:
vae = CausalVAE(0, 10, X_train.shape[1], use_cuda=True)
optimizer = Adam({})
svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO())

In [142]:
train_loader = torch.utils.data.DataLoader(X_train, batch_size=32)

In [146]:
def train(svi, train_loader, *, use_cuda=False):
    epoch_loss = 0
    
    for X in tqdm(train_loader):
        if use_cuda:
            X = X.cuda()

        epoch_loss += svi.step(X)
    
    total_epoch_loss_train = epoch_loss / len(train_loader.dataset)
    return total_epoch_loss_train

In [156]:
pyro.clear_param_store()
train(svi, train_loader, use_cuda=True)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15003/15003 [03:09<00:00, 79.34it/s]


3.284507436183707e+21

In [169]:
x_test = torch.from_numpy(X_test[30]).cuda()
nn.functional.mse_loss(x_test, vae.reconstruct(x_test)).sqrt()

tensor(0.9093, device='cuda:0', grad_fn=<SqrtBackward0>)

In [163]:
x_test, vae.reconstruct(x_test)

(tensor([ 0.0000,  2.0000, -1.1104,  0.6397, -0.2561, -0.0194, -0.0085, -0.0465,
         -0.0298, -2.0826], device='cuda:0'),
 tensor([1.0000e+00, 9.9999e-01, 7.4742e-02, 5.6891e-02, 4.6018e-05, 5.1842e-04,
         2.0472e-05, 1.7344e-05, 2.5053e-05, 3.6389e-02], device='cuda:0',
        grad_fn=<SigmoidBackward0>))