<a href="https://colab.research.google.com/github/MackPreston/iot-traffic-analysis/blob/main/IoT_Traffic_Analysis_Tranalyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project Structure
In order to run this script, the following project structure is expected


```
mlproject/    # project root (where jupyter is running)
    benign/
        pcap/
          18-05-28.pcap
          18-05-29.pcap
          ...
        tran/
          18-05-28_flows.pcap
          18-05-28_headers.pcap
          ...
        argus/
          ...
    attack/
        pcap/
          ...
        tran/
          ...
        argus/
          ...
```



# Extracting flow dataset from pcap

```
# Run these bash commands to generate tranalyzer flow files from pcap files
# WARNING! this may take a while
for f in benign/pcap/*.pcap; do t2 -r "$f" -w benign/tran/. & done
for f in attack/pcap/*.pcap; do t2 -r "$f" -w attack/tran/. & done
```



# Imports

In [None]:
import sys
import os
import glob
print(sys.version_info)
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
import collections

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix,classification_report

sys.version_info(major=3, minor=8, micro=5, releaselevel='final', serial=0)


## Loading benign dataset

The dataset is then loadeded from tranalyzers output into a dataframe

In [None]:
print("Current Directory:" + os.getcwd())
# file_pattern = '*_flows.txt' # Use all pcap files
file_pattern = '18-05-29_flows.txt'
glob_path = os.path.join(os.getcwd(), 'benign/tran', file_pattern)
file_list = glob.glob(glob_path)

li = []
for filepath in file_list:
    df = pd.read_csv(filepath, index_col=None, header=0, delimiter='\t')
    li.append(df)

benign_flows = pd.concat(li, axis=0, ignore_index=True)
benign_flows

Current Directory:/mnt/e/MLProject


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,%dir,flowInd,flowStat,timeFirst,timeLast,duration,numHdrDesc,numHdrs,hdrDesc,srcMac,...,icmpTCcnt,icmpBFTypH_TypL_Code,icmpTmGtw,icmpEchoSuccRatio,icmpPFindex,connSip,connDip,connSipDip,connSipDprt,connF
0,A,2,0x0400000000004000,1.527552e+09,1.527552e+09,0.012182,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,2,2,2.0
1,B,2,0x0400000000004001,1.527552e+09,1.527552e+09,0.001862,1,3,eth:ipv4:tcp,ec:1a:59:83:28:11,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,1,1,1.0
2,A,3,0x0400000000004000,1.527552e+09,1.527552e+09,2.146254,1,3,eth:ipv4:tcp,ec:1a:59:83:28:11,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,2,2,2.0
3,B,3,0x0400000000004001,1.527552e+09,1.527552e+09,1.922308,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,1,1,1.0
4,A,24,0x0400000000004000,1.527552e+09,1.527552e+09,0.012025,1,3,eth:ipv4:tcp,ec:1a:59:83:28:11,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,5,1,4,4,0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90926,B,47443,0x0400000000004001,1.527635e+09,1.527638e+09,3514.705441,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,1,1,1.0
90927,A,49217,0x0400000000004000,1.527638e+09,1.527638e+09,45.225318,1,3,eth:ipv4:tcp,f4:f5:d8:d4:eb:12,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,2,2,2.0
90928,B,49217,0x0400000000004001,1.527638e+09,1.527638e+09,45.224967,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,1,1,1.0
90929,A,1,0x0400000000004000,1.527552e+09,1.527638e+09,86397.810988,1,3,eth:ipv4:tcp,30:8c:fb:2f:e4:b2,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,2,2,2.0


# Loading Mixed Dataset

In [None]:
import os
import glob
print("Current Directory:" + os.getcwd())
# file_pattern = '*_flows.txt' # Use all pcap files
file_pattern = '18-06-01_flows.txt'
glob_path = os.path.join(os.getcwd(), 'attack/tran', file_pattern)
file_list = glob.glob(glob_path)

li = []
for filepath in file_list:
    df = pd.read_csv(filepath, index_col=None, header=0, delimiter='\t')
    li.append(df)

attack_flows = pd.concat(li, axis=0, ignore_index=True)
attack_flows

Current Directory:/mnt/e/MLProject


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,%dir,flowInd,flowStat,timeFirst,timeLast,duration,numHdrDesc,numHdrs,hdrDesc,srcMac,...,icmpTCcnt,icmpBFTypH_TypL_Code,icmpTmGtw,icmpEchoSuccRatio,icmpPFindex,connSip,connDip,connSipDip,connSipDprt,connF
0,A,1,0x0400000000004000,1.527823e+09,1.527823e+09,0.190275,1,3,eth:ipv4:tcp,e0:76:d0:3f:00:ae,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,4,4,4.0
1,B,1,0x0400000000004001,1.527823e+09,1.527823e+09,0.000187,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,3,3,3.0
2,A,2,0x0400000000004000,1.527823e+09,1.527823e+09,10.196861,1,3,eth:ipv4:tcp,e0:76:d0:3f:00:ae,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,3,1,4,6,2.0
3,B,2,0x0400000000004001,1.527823e+09,1.527823e+09,10.007584,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,3,3,5,5.0
4,A,7,0x0400000000004000,1.527823e+09,1.527823e+09,10.196851,1,3,eth:ipv4:tcp,e0:76:d0:3f:00:ae,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,5,1,2,8,1.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226479,B,117860,0x0400000000004001,1.527897e+09,1.527897e+09,0.219089,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,1,1,1.0
226480,A,120,0x0400000000004000,1.527823e+09,1.527897e+09,74067.670764,1,3,eth:ipv4:tcp,30:8c:fb:2f:e4:b2,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,2,2,2.0
226481,B,120,0x0400000000004001,1.527823e+09,1.527897e+09,74066.088314,1,3,eth:ipv4:tcp,14:cc:20:51:33:ea,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,1,1,1.0
226482,A,117861,0x0800000000008000,1.527897e+09,1.527897e+09,0.000000,1,3,eth:ipv6:udp,e0:76:d0:3f:00:ae,...,0,0x00000000_0x00000000_0x0000,0x00000000,0.0,0,1,1,1,1,1.0


# Feature Selection