In [1]:
import pandas as pd
import numpy as np
from EIMTC.preprocessing import OneHotEncoderEIMTC, M1CNNPreprocessing
from EIMTC.selection import train_test_split
from EIMTC.metrics import classification_report
from EIMTC.models import M1CNN

In [17]:
filepath = './data/iscx2016_merged.csv'
df = pd.read_csv(filepath, usecols=['udps.n_bytes_counted', 'udps.n_bytes', 'encapsulation', 'traffic_type'])
df

Unnamed: 0,udps.n_bytes,udps.n_bytes_counted,encapsulation,traffic_type
0,"[80, 79, 83, 84, 32, 47, 111, 99, 115, 112, 32...",784,nonvpn,video
1,"[23, 3, 3, 0, 58, 0, 0, 0, 0, 0, 0, 0, 7, 210,...",63,nonvpn,video
2,"[230, 107, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 11...",114,nonvpn,video
3,"[198, 218, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 11...",119,nonvpn,video
4,"[166, 113, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 11...",114,nonvpn,video
...,...,...,...,...
313862,"[43, 100, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 115...",249,nonvpn,video
313863,"[43, 100, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 115...",249,nonvpn,video
313864,"[68, 223, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 115...",409,nonvpn,video
313865,"[68, 223, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 115...",249,nonvpn,video


In [3]:
def preprocessing(df):
    df = df.dropna()
    M1CNNPreprocessing.preprocess_features(dataframe=df)
    enc = OneHotEncoderEIMTC()
    df['traffic_type_ohc'] = list(enc.fit_transform(df['traffic_type']).toarray())
    return df

In [4]:
df = preprocessing(df)
df

Unnamed: 0,udps.n_bytes,encapsulation,traffic_type,traffic_type_ohc
0,"[80, 79, 83, 84, 32, 47, 111, 99, 115, 112, 32...",nonvpn,video,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
1,"[23, 3, 3, 0, 58, 0, 0, 0, 0, 0, 0, 0, 7, 210,...",nonvpn,video,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
2,"[230, 107, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 11...",nonvpn,video,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
3,"[198, 218, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 11...",nonvpn,video,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
4,"[166, 113, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 11...",nonvpn,video,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
...,...,...,...,...
313862,"[43, 100, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 115...",nonvpn,video,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
313863,"[43, 100, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 115...",nonvpn,video,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
313864,"[68, 223, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 115...",nonvpn,video,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
313865,"[68, 223, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 115...",nonvpn,video,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"


In [5]:
n_bytes = len(df['udps.n_bytes'].iloc[0])
n_classes = len(df['traffic_type'].unique())
model = M1CNN(payload_size=n_bytes, n_classes=n_classes)
print(n_bytes, n_classes)

784 6


In [6]:
x_train, x_test, y_train, y_test = train_test_split(df['udps.n_bytes'].values, df['traffic_type_ohc'].values, 
    test_size=0.25,
    stratify=df['traffic_type'].values,
    random_state=42)

In [None]:
epochs = 10
batch_size = 128
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy'
)
model.fit(
    np.stack(x_train), 
    np.stack(y_train), 
    epochs=epochs, 
    batch_size=batch_size, 
    use_multiprocessing=True,
    workers=4,
    verbose=1
)

In [None]:
predictions = model.model.predict(np.stack(x_test))
predictions = np.argmax(predictions, axis=1)
y_test_true = np.argmax(np.stack(y_test), axis=1)

report = classification_report(y_test_true, predictions)
report

'              precision    recall  f1-score   support\n\n           0       0.72      0.99      0.83     53317\n           1       0.79      0.45      0.57      3769\n           2       0.75      0.10      0.18      1947\n           3       0.77      0.04      0.08     17640\n           4       0.97      0.92      0.94       122\n           5       0.65      0.50      0.56      1209\n\n    accuracy                           0.72     78004\n   macro avg       0.77      0.50      0.53     78004\nweighted avg       0.73      0.72      0.63     78004\n'

In [9]:
with open('m1cnn_iscx2016_traffictype_report.txt', "w+") as f:
    f.write(report)
