In [76]:
import os
import torch
import csv
import numpy as np
import pandas as pd
import torch.nn as nn
from sklearn.model_selection import train_test_split
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.utils.data as data_utils


In [77]:
#constant declaration
NUM_EPOCHS = 50
LEARNING_RATE = 1e-3
BATCH_SIZE = 100

In [78]:
# importing benign and mixed flow datasets from tran_feature_selection

# Re-enable these three lines when using real data instead of sample_flows.csv
# %store -r benign_flows  
# %store -r mixed_flows
# %store -r features

# This is a subset of mixed data intended only for making sure the model works on our data format
# actual analysis should use benign_flows and mixed_flows that were stored by tran_feature_selection
benign_flows = pd.read_csv("sample_flows.csv", index_col=None, delimiter=',') 

features_ori = ['flowInd', 'duration', 'numHdrDesc',
            'l4Proto', 'macPairs', 'dstPortClassN', 'numPktsSnt', 'numPktsRcvd',
            'numBytesSnt', 'numBytesRcvd', 'minPktSz', 'maxPktSz', 'avePktSize', 'stdPktSize', 'pktps', 'bytps',
            'pktAsm', 'bytAsm', 'ipMindIPID', 'ipMaxdIPID', 'ipMinTTL', 'ipMaxTTL', 'ipTTLChg', 'ipOptCnt',
            'tcpPSeqCnt', 'tcpSeqSntBytes', 'tcpSeqFaultCnt', 'tcpPAckCnt', 'tcpFlwLssAckRcvdBytes', 'tcpAckFaultCnt',
            'tcpInitWinSz', 'tcpAveWinSz', 'tcpMinWinSz', 'tcpMaxWinSz', 'tcpWinSzDwnCnt', 'tcpWinSzUpCnt',
            'tcpWinSzChgDirCnt', 'tcpOptPktCnt', 'tcpOptCnt', 'tcpMSS', 'tcpWS', 'tcpTmS', 'tcpTmER', 'tcpEcI',
            'tcpBtm', 'tcpSSASAATrip', 'tcpRTTAckTripMin', 'tcpRTTAckTripMax', 'tcpRTTAckTripAve',
            'tcpRTTAckTripJitAve', 'tcpRTTSseqAA', 'tcpRTTAckJitAve', 'icmpTCcnt', 'icmpEchoSuccRatio', 'icmpPFindex',
            'connSip', 'connDip', 'connSipDip', 'connSipDprt', 'connF',  'aveIAT', 'maxIAT', 
                'stdIAT', 'tcpISeqN', 'tcpUtm', 'tcpWinSzThRt']
benign_flows1 = pd.DataFrame(benign_flows[features_ori])
#features = benign_flows.columns
print(benign_flows1.head(10))
features = benign_flows1.columns

   flowInd   duration  numHdrDesc  l4Proto  macPairs  dstPortClassN  \
0        1   0.190275           1        6         1            443   
1        1   0.000187           1        6         1            443   
2        2  10.196861           1        6         1            443   
3        2  10.007584           1        6         1            443   
4        7  10.196851           1        6         1            443   
5        7  10.009806           1        6         1            443   
6       19   0.000677           1        6         1            443   
7       18   0.830672           1        6         1            443   
8       18   0.650051           1        6         1            443   
9       22  10.451712           1        6         1            443   

   numPktsSnt  numPktsRcvd  numBytesSnt  numBytesRcvd  ...  connDip  \
0           3            3           31            31  ...        1   
1           3            3           31            31  ...        1   
2    

In [79]:
#dataset loading
train_tensor = torch.tensor(benign_flows[features].values.astype(np.float32))
train_loader = torch.utils.data.DataLoader(train_tensor, batch_size = BATCH_SIZE, shuffle = False)


In [80]:
#encoder decoder 

class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # encoder
        self.enc1 = nn.Linear(in_features=len(features), out_features=len(features))
        self.enc2 = nn.Linear(in_features=len(features), out_features= 33)
        self.enc3 = nn.Linear(in_features=33, out_features=16)
#         self.enc4 = nn.Linear(in_features=32, out_features=16)
        
        # decoder 
        self.dec1 = nn.Linear(in_features=16, out_features=33)
        self.dec2 = nn.Linear(in_features=33, out_features=len(features))
        self.dec3 = nn.Linear(in_features=len(features), out_features=len(features))
#         self.dec3 = nn.Linear(in_features=70, out_features=len(features))
        
    def forward(self, x):
        x = F.relu(self.enc1(x))
        x = F.relu(self.enc2(x))
        x = F.relu(self.enc3(x))
#         x = F.relu(self.enc4(x))

        x = F.relu(self.dec1(x))
        x = F.relu(self.dec2(x))
        x = F.relu(self.dec3(x))
#         x = F.relu(self.dec4(x))
        return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = Autoencoder()
optimizer = optim.Adam(net.parameters(), lr=1e-3)

#print(net)

In [81]:
#Trainning model

loss_function = nn.BCELoss()  #MSELoss too
get_loss = list()
def training(net, trainloader, epochs):
    train_loss = []
    for epoch in range(epochs):
        running_loss = 0.0
        for data in train_loader:
            input_data = data.to(device=device)
            optimizer.zero_grad()
            output = net(input_data).to(device=device)          # output is the reconstruced x 
            loss = loss_function(output,input_data).to(device=device) # input_data should be the target variable
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        loss = running_loss / len(trainloader)
        train_loss.append(loss)
        
        if epoch % 5 == 0:
            print('Epoch {} of {}, Train Loss: {:.3f}'.format(
            epoch+1, NUM_EPOCHS, loss))
    return train_loss

get_loss = training(net, train_loader, NUM_EPOCHS)
get_loss
#plotting of get_loss 


            

RuntimeError: Assertion `x >= 0. && x <= 1.' failed. input value should be between 0~1, but got 56807.558594 at C:\Users\builder\AppData\Local\Temp\pip-req-build-e5c8dddg\aten\src\THNN/generic/BCECriterion.c:62