In [4]:
import pandas as pd
import numpy as np
import csv
import os

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [5]:
folder_path = "../../Datasets/QUIC"
all_sessions = []
labels = []
## Iterating through all the folders and files in the given path.
## Each folder represents a label.
## Each file represents a session.
## Each session is converted to a Pandas DataFrame.
for label in os.listdir(folder_path):
    label_folder_path = os.path.join(folder_path, label)
    if os.path.isdir(label_folder_path):
        for filename in os.listdir(label_folder_path):
            file_path = os.path.join(label_folder_path, filename)
            # Converting each session to Panda's DataFrame and restricting the number of rows. 
            session_df = pd.read_csv(file_path, sep="\t", header=None, skiprows=4, nrows=150)
            # Giving names to the columns (features).
            session_df.columns = ['Timestamp', 'Time Difference', 'Packet Size', 'Direction']
            # Adding to the collection.
            if not session_df.empty:
                all_sessions.append(session_df)
                labels.append(label)
# Merge all DataFrames to one DataFrame.
all_data = pd.concat(all_sessions, ignore_index=True)

print(session_df.head())

      Timestamp  Time Difference  Packet Size  Direction
0  1.522650e+09         0.080841         1294          1
1  1.522650e+09         0.081183         1294          1
2  1.522650e+09         0.081186          198          1
3  1.522650e+09         0.081203           74          0
4  1.522650e+09         0.348584          316          0


In [6]:
#2d array of traffic
trafficArr = []
for session in all_sessions:
    # if direction is 1, multiply packet size by -1
    session.loc[session['Direction'] == 1, 'Packet Size'] *= -1
    # remove Timestamp and Direction columns
    session.drop(['Timestamp', 'Direction'], axis=1, inplace=True)
    traffic = []
    # create list of values between 0 and 5 with step 0.1
    values = np.arange(0, 5, 0.1)
    for timing in values:
        sum = 0
        for index, row in session.iterrows():   
            if row['Time Difference'] < timing+0.5 and row['Time Difference'] > timing:
                sum += row['Packet Size']
            # else:
            #     break
        traffic.append(sum)
    # convert traffic to numpy array
    traffic = np.array(traffic)
    # add traffic to trafficArr
    trafficArr.append(traffic)
type(trafficArr)
# convert trafficArr to 2d-numpy array
trafficArr = np.array(trafficArr)

In [7]:
# convert labels to numpy array
labels = np.array(labels)

# train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(trafficArr, labels, test_size=0.2, random_state=42,stratify=labels)

from sklearn.metrics import classification_report

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rand_forest = RandomForestClassifier(n_estimators=100)
rand_forest.fit(X_train, y_train)
y_pred = rand_forest.predict(X_test)

print("Accuracy:",accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9549689440993789
               precision    recall  f1-score   support

   Google Doc       0.98      0.96      0.97       244
 Google Drive       0.95      0.95      0.95       327
 Google Music       0.92      0.90      0.91       118
Google Search       0.99      0.99      0.99       383
      Youtube       0.89      0.93      0.91       216

     accuracy                           0.95      1288
    macro avg       0.95      0.94      0.95      1288
 weighted avg       0.96      0.95      0.96      1288



In [9]:
from aeon.classification.convolution_based import RocketClassifier

rocket = RocketClassifier(num_kernels=2000)
rocket.fit(X_train, y_train)
y_pred = rocket.predict(X_test)

print("Accuracy:",accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


Accuracy: 0.906055900621118
               precision    recall  f1-score   support

   Google Doc       0.99      0.98      0.98       244
 Google Drive       0.96      0.87      0.91       327
 Google Music       0.78      0.74      0.76       118
Google Search       0.98      0.96      0.97       383
      Youtube       0.72      0.88      0.79       216

     accuracy                           0.91      1288
    macro avg       0.89      0.88      0.88      1288
 weighted avg       0.91      0.91      0.91      1288



In [10]:
from aeon.classification.hybrid import HIVECOTEV2

hc2 = HIVECOTEV2(time_limit_in_minutes=0.2)
hc2.fit(X_train, y_train)
y_pred = hc2.predict(X_test)

print("Accuracy:",accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9448757763975155
               precision    recall  f1-score   support

   Google Doc       0.99      0.95      0.97       244
 Google Drive       0.94      0.94      0.94       327
 Google Music       0.87      0.87      0.87       118
Google Search       0.99      0.98      0.99       383
      Youtube       0.86      0.91      0.89       216

     accuracy                           0.94      1288
    macro avg       0.93      0.93      0.93      1288
 weighted avg       0.95      0.94      0.95      1288

