In [31]:
import os
import torch
import csv
import pandas as pd
import torch.nn as nn
from sklearn.model_selection import train_test_split
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.utils.data as data_utils


In [7]:
#constant declaration
NUM_EPOCHS = 50
LEARNING_RATE = 1e-3
BATCH_SIZE = 100

In [57]:
# importing benign and mixed flow datasets from tran_feature_selection

# Re-enable these three lines when using real data instead of sample_flows.csv
# %store -r benign_flows  
# %store -r mixed_flows
# %store -r features

# This is a subset of mixed data intended only for making sure the model works on our data format
# actual analysis should use benign_flows and mixed_flows that were stored by tran_feature_selection
benign_flows = pd.read_csv("sample_flows.csv", index_col=None, delimiter=',') 

features = ['flowInd', 'duration', 'numHdrDesc',
            'l4Proto', 'macPairs', 'dstPortClassN', 'numPktsSnt', 'numPktsRcvd',
            'numBytesSnt', 'numBytesRcvd', 'minPktSz', 'maxPktSz', 'avePktSize', 'stdPktSize', 'pktps', 'bytps',
            'pktAsm', 'bytAsm', 'ipMindIPID', 'ipMaxdIPID', 'ipMinTTL', 'ipMaxTTL', 'ipTTLChg', 'ipOptCnt',
            'tcpPSeqCnt', 'tcpSeqSntBytes', 'tcpSeqFaultCnt', 'tcpPAckCnt', 'tcpFlwLssAckRcvdBytes', 'tcpAckFaultCnt',
            'tcpInitWinSz', 'tcpAveWinSz', 'tcpMinWinSz', 'tcpMaxWinSz', 'tcpWinSzDwnCnt', 'tcpWinSzUpCnt',
            'tcpWinSzChgDirCnt', 'tcpOptPktCnt', 'tcpOptCnt', 'tcpMSS', 'tcpWS', 'tcpTmS', 'tcpTmER', 'tcpEcI',
            'tcpBtm', 'tcpSSASAATrip', 'tcpRTTAckTripMin', 'tcpRTTAckTripMax', 'tcpRTTAckTripAve',
            'tcpRTTAckTripJitAve', 'tcpRTTSseqAA', 'tcpRTTAckJitAve', 'icmpTCcnt', 'icmpEchoSuccRatio', 'icmpPFindex',
            'connSip', 'connDip', 'connSipDip', 'connSipDprt', 'connF',  'aveIAT', 'maxIAT', 
                'stdIAT', 'tcpISeqN', 'tcpUtm', 'tcpWinSzThRt']
benign_flows

Unnamed: 0,%dir,flowInd,flowStat,timeFirst,timeLast,duration,numHdrDesc,numHdrs,hdrDesc,srcMac,...,icmpTCcnt,icmpBFTypH_TypL_Code,icmpTmGtw,icmpEchoSuccRatio,icmpPFindex,connSip,connDip,connSipDip,connSipDprt,connF
0,A,1,0x0400000000004000,1.527823e+09,1.527823e+09,0.190275,1,3,eth:ipv4:tcp,e0:76:d0:3f:00:ae,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,4,4,4.000
1,B,1,0x0400000000004001,1.527823e+09,1.527823e+09,0.000187,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,3,3,3.000
2,A,2,0x0400000000004000,1.527823e+09,1.527823e+09,10.196861,1,3,eth:ipv4:tcp,e0:76:d0:3f:00:ae,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,3,1,4,6,2.000
3,B,2,0x0400000000004001,1.527823e+09,1.527823e+09,10.007584,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,3,3,5,5.000
4,A,7,0x0400000000004000,1.527823e+09,1.527823e+09,10.196851,1,3,eth:ipv4:tcp,e0:76:d0:3f:00:ae,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,5,1,2,8,1.600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,B,702,0x0400000000004001,1.527824e+09,1.527824e+09,1.924726,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,9,1,1,1.000
996,A,545,0x0400000000004000,1.527823e+09,1.527823e+09,0.000000,1,3,eth:ipv4:udp,70:ee:50:18:34:43,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,8,1,9,35,4.375
997,A,546,0x0400000000004000,1.527823e+09,1.527823e+09,0.000000,1,3,eth:ipv4:udp,70:ee:50:18:34:43,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,8,1,10,34,4.250
998,B,546,0x0400000000004001,1.527823e+09,1.527823e+09,0.000000,1,3,eth:ipv4:udp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,8,9,33,33.000


In [43]:
#dataset loading
train_tensor = torch.tensor(benign_flows[features].values)
train_loader = torch.utils.data.DataLoader(train_tensor, batch_size = BATCH_SIZE, shuffle = False)


In [50]:
#encoder decoder 

class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # encoder
        self.enc1 = nn.Linear(in_features=len(features), out_features=70)
        self.enc2 = nn.Linear(in_features=70, out_features=64)
        self.enc3 = nn.Linear(in_features=64, out_features=32)
        self.enc4 = nn.Linear(in_features=32, out_features=16)
        
        # decoder 
        self.dec1 = nn.Linear(in_features=16, out_features=32)
        self.dec2 = nn.Linear(in_features=32, out_features=64)
        self.dec3 = nn.Linear(in_features=64, out_features=70)
        self.dec3 = nn.Linear(in_features=70, out_features=len(features))
        
    def forward(self, x):
        x = F.relu(self.enc1(x))
        x = F.relu(self.enc2(x))
        x = F.relu(self.enc3(x))
        x = F.relu(self.enc4(x))

        x = F.relu(self.dec1(x))
        x = F.relu(self.dec2(x))
        x = F.relu(self.dec3(x))
        x = F.relu(self.dec4(x))
        return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = Autoencoder()
optimizer = optim.Adam(net.parameters(), lr=1e-3)

#print(net)

In [51]:
#Trainning model

loss_function = nn.BCELoss()  #MSELoss too
get_loss = list()
def training(net, trainloader, epochs):
    train_loss = []
    for epoch in range(epochs):
        running_loss = 0.0
        for data in train_loader:
            input_data = data.to(device=device)
            optimizer.zero_grad()
            output = net(input_data).to(device=device)          # output is the reconstruced x 
            loss = loss_function(output, labels).to(device=device) 
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        loss = running_loss / len(trainloader)
        train_loss.append(loss)
        
        if epoch % 5 == 0:
            print('Epoch {} of {}, Train Loss: {:.3f}'.format(
            epoch+1, NUM_EPOCHS, loss))
    return train_loss

get_loss = training(net, trainloader, NUM_EPOCHS)

#plotting of get_loss 


            

RuntimeError: ignored