In [80]:
import warnings
warnings.simplefilter('ignore')

In [92]:
import pandas as pd
import numpy as np
import click
import logging as LOGGER
import seaborn as sns
import matplotlib.pyplot as plt

from glob import glob
from matplotlib.backends.backend_pdf import PdfPages

from src.data.data import create_directory

plt.rc('text', usetex=True)
plt.rc('font', family='serif')

LOGGER.basicConfig(format="%(asctime)s %(levelname)s %(message)s", level=LOGGER.INFO)

def get_files(glob_):
    res = glob(glob_)
    if res:
        return res
    raise ValueError(f"No matches for {glob_}")

def read_multiple_csvs(files):
    dfs = []
    for i in files:
        dfs.append(pd.read_csv(i))
    LOGGER.info("Concatenating metrics ...")
    return pd.concat(dfs)

def remove_predict(df, type_='netflow'):
    return df[df.model.str.contains('_0steps_') & df.model.str.contains(type_)]

# Netflow - Detection

In [116]:
ddos1 = remove_predict(read_multiple_csvs(get_files('../no_background_output/ddos_1s/*fast*/metrics.csv'))).reset_index(drop=True)
ddos01 = remove_predict(read_multiple_csvs(get_files('../no_background_output/ddos_10s/*fast*/metrics.csv'))).reset_index(drop=True)
ddos10 = remove_predict(read_multiple_csvs(get_files('../no_background_output/ddos_0.1s/*fast*/metrics.csv'))).reset_index(drop=True)

spam1 = remove_predict(read_multiple_csvs(get_files('../no_background_output/spam_1s/*fast*/metrics.csv'))).reset_index(drop=True)
spam01 = remove_predict(read_multiple_csvs(get_files('../no_background_output/spam_10s/*fast*/metrics.csv'))).reset_index(drop=True)
spam10 = remove_predict(read_multiple_csvs(get_files('../no_background_output/spam_0.1s/*fast*/metrics.csv'))).reset_index(drop=True)

irc1 = remove_predict(read_multiple_csvs(get_files('../no_background_output/irc_1s/*fast*/metrics.csv'))).reset_index(drop=True)
irc01 = remove_predict(read_multiple_csvs(get_files('../no_background_output/irc_10s/*fast*/metrics.csv'))).reset_index(drop=True)
irc10 = remove_predict(read_multiple_csvs(get_files('../no_background_output/irc_0.1s/*fast*/metrics.csv'))).reset_index(drop=True)

2019-11-03 01:21:15,152 INFO Concatenating metrics ...
2019-11-03 01:21:15,163 INFO Concatenating metrics ...
2019-11-03 01:21:15,173 INFO Concatenating metrics ...
2019-11-03 01:21:15,184 INFO Concatenating metrics ...
2019-11-03 01:21:15,194 INFO Concatenating metrics ...
2019-11-03 01:21:15,200 INFO Concatenating metrics ...
2019-11-03 01:21:15,210 INFO Concatenating metrics ...
2019-11-03 01:21:15,221 INFO Concatenating metrics ...
2019-11-03 01:21:15,231 INFO Concatenating metrics ...


In [117]:
DDOS = pd.DataFrame({"DDoS 0.1 - Second" : ddos01.f1, "DDoS - 1 Second": ddos1.f1, "DDoS - 10 Second": ddos10.f1, })
DDOS.index = ['Logistic Regression', 'Random Forest', 'Gradient Boosting']
DDOS

Unnamed: 0,DDoS 0.1 - Second,DDoS - 1 Second,DDoS - 10 Second
Logistic Regression,0.400641,0.38824,0.636746
Random Forest,0.91818,0.951444,0.957782
Gradient Boosting,0.913055,0.989507,0.998664


In [118]:
SPAM = pd.DataFrame({"Spam 0.1 - Second" : spam01.f1, "Spam - 1 Second": spam1.f1, "Spam - 10 Second": spam10.f1, })
SPAM.index = ['Logistic Regression', 'Random Forest', 'Gradient Boosting']
SPAM

Unnamed: 0,Spam 0.1 - Second,Spam - 1 Second,Spam - 10 Second
Logistic Regression,0.469453,0.465331,0.697225
Random Forest,0.93844,0.926827,0.921818
Gradient Boosting,0.935124,0.9887,0.998648


In [119]:
IRC = pd.DataFrame({"IRC 0.1 - Second" : irc01.f1, "IRC - 1 Second": irc1.f1, "IRC - 10 Second": irc10.f1, })
IRC.index = ['Logistic Regression', 'Random Forest', 'Gradient Boosting']
IRC

Unnamed: 0,IRC 0.1 - Second,IRC - 1 Second,IRC - 10 Second
Logistic Regression,0.486168,0.518874,0.67736
Random Forest,0.937615,0.92661,0.941098
Gradient Boosting,0.953216,0.992189,0.999376


In [120]:
print(DDOS.to_latex(float_format='%1.3f'))

\begin{tabular}{lrrr}
\toprule
{} &  DDoS 0.1 - Second &  DDoS - 1 Second &  DDoS - 10 Second \\
\midrule
Logistic Regression &              0.401 &            0.388 &             0.637 \\
Random Forest       &              0.918 &            0.951 &             0.958 \\
Gradient Boosting   &              0.913 &            0.990 &             0.999 \\
\bottomrule
\end{tabular}



In [121]:
print(SPAM.to_latex(float_format='%1.3f'))

\begin{tabular}{lrrr}
\toprule
{} &  Spam 0.1 - Second &  Spam - 1 Second &  Spam - 10 Second \\
\midrule
Logistic Regression &              0.469 &            0.465 &             0.697 \\
Random Forest       &              0.938 &            0.927 &             0.922 \\
Gradient Boosting   &              0.935 &            0.989 &             0.999 \\
\bottomrule
\end{tabular}



In [122]:
print(IRC.to_latex(float_format='%1.3f'))

\begin{tabular}{lrrr}
\toprule
{} &  IRC 0.1 - Second &  IRC - 1 Second &  IRC - 10 Second \\
\midrule
Logistic Regression &             0.486 &           0.519 &            0.677 \\
Random Forest       &             0.938 &           0.927 &            0.941 \\
Gradient Boosting   &             0.953 &           0.992 &            0.999 \\
\bottomrule
\end{tabular}



# Pcap - Detection

In [110]:
ddos1 = remove_predict(read_multiple_csvs(get_files('../no_background_output/ddos_1s/pcap*/metrics.csv')), 'pcap').reset_index(drop=True)
ddos10 = remove_predict(read_multiple_csvs(get_files('../no_background_output/ddos_10s/pcap*/metrics.csv')), 'pcap').reset_index(drop=True)
ddos01 = remove_predict(read_multiple_csvs(get_files('../no_background_output/ddos_0.1s/pcap*/metrics.csv')), 'pcap').reset_index(drop=True)

spam1 = remove_predict(read_multiple_csvs(get_files('../no_background_output/spam_1s/pcap*/metrics.csv')), 'pcap').reset_index(drop=True)
spam10 = remove_predict(read_multiple_csvs(get_files('../no_background_output/spam_10s/pcap*/metrics.csv')), 'pcap').reset_index(drop=True)
# spam01 = remove_predict(read_multiple_csvs(get_files('../no_background_output/spam_0.1s/pcap*/metrics.csv')), 'pcap').reset_index(drop=True)

irc1 = remove_predict(read_multiple_csvs(get_files('../no_background_output/irc_1s/pcap*/metrics.csv')), 'pcap').reset_index(drop=True)
irc10 = remove_predict(read_multiple_csvs(get_files('../no_background_output/irc_10s/pcap*/metrics.csv')), 'pcap').reset_index(drop=True)
irc01 = remove_predict(read_multiple_csvs(get_files('../no_background_output/irc_0.1s/pcap*/metrics.csv')), 'pcap').reset_index(drop=True)

2019-11-03 01:15:40,830 INFO Concatenating metrics ...
2019-11-03 01:15:40,989 INFO Concatenating metrics ...
2019-11-03 01:15:41,147 INFO Concatenating metrics ...
2019-11-03 01:15:41,305 INFO Concatenating metrics ...
2019-11-03 01:15:41,463 INFO Concatenating metrics ...
2019-11-03 01:15:41,620 INFO Concatenating metrics ...
2019-11-03 01:15:41,777 INFO Concatenating metrics ...
2019-11-03 01:15:41,936 INFO Concatenating metrics ...


In [111]:
DDOS = pd.DataFrame({"DDoS - 0.1 Second" : ddos01.f1, "DDoS - 1 Second": ddos1.f1, "DDoS - 10 Second": ddos10.f1, })
DDOS.index = ['Logistic Regression', 'Random Forest', 'Gradient Boosting']
DDOS

Unnamed: 0,DDoS - 0.1 Second,DDoS - 1 Second,DDoS - 10 Second
Logistic Regression,0.989312,0.929004,0.873234
Random Forest,0.994692,0.925323,0.843595
Gradient Boosting,0.994763,0.970657,0.996625


In [105]:
SPAM = pd.DataFrame({"Spam - 0.1 Second" : spam01.f1, "Spam - 1 Second": spam1.f1, "Spam - 10 Second": spam10.f1, })
SPAM.index = ['Logistic Regression', 'Random Forest', 'Gradient Boosting']
SPAM

Unnamed: 0,Spam - 0.1 Second,Spam - 1 Second,Spam - 10 Second
Logistic Regression,0.848238,0.843769,0.848238
Random Forest,0.865508,0.867312,0.865508
Gradient Boosting,0.992926,0.995193,0.992926


In [106]:
IRC = pd.DataFrame({"IRC - 0.1 Second" : irc01.f1, "IRC - 1 Second": irc1.f1, "IRC - 10 Second": irc10.f1, })
IRC.index = ['Logistic Regression', 'Random Forest', 'Gradient Boosting']
IRC

Unnamed: 0,IRC - 0.1 Second,IRC - 1 Second,IRC - 10 Second
Logistic Regression,0.897895,0.847097,0.900524
Random Forest,0.938205,0.886144,0.910534
Gradient Boosting,0.995193,1.0,0.945013


In [113]:
print(DDOS.to_latex(float_format='%1.3f'))

\begin{tabular}{lrrr}
\toprule
{} &  DDoS - 0.1 Second &  DDoS - 1 Second &  DDoS - 10 Second \\
\midrule
Logistic Regression &              0.989 &            0.929 &             0.873 \\
Random Forest       &              0.995 &            0.925 &             0.844 \\
Gradient Boosting   &              0.995 &            0.971 &             0.997 \\
\bottomrule
\end{tabular}



In [114]:
print(SPAM.to_latex(float_format='%1.3f'))

\begin{tabular}{lrrr}
\toprule
{} &  Spam - 0.1 Second &  Spam - 1 Second &  Spam - 10 Second \\
\midrule
Logistic Regression &              0.848 &            0.844 &             0.848 \\
Random Forest       &              0.866 &            0.867 &             0.866 \\
Gradient Boosting   &              0.993 &            0.995 &             0.993 \\
\bottomrule
\end{tabular}



In [115]:
print(IRC.to_latex(float_format='%1.3f'))

\begin{tabular}{lrrr}
\toprule
{} &  IRC - 0.1 Second &  IRC - 1 Second &  IRC - 10 Second \\
\midrule
Logistic Regression &             0.898 &           0.847 &            0.901 \\
Random Forest       &             0.938 &           0.886 &            0.911 \\
Gradient Boosting   &             0.995 &           1.000 &            0.945 \\
\bottomrule
\end{tabular}

