In [1]:
import sklearn as sk
import pandas as pd
import os 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import StackingClassifier


from google.colab import drive
drive.mount('/content/drive')

def read_CAN_trace(file):
    dataset = pd.read_csv(file, sep=',')
    return dataset

def load_dataset(folder):

    datasets = []
    for filename in os.listdir(folder):
        f = os.path.join(folder, filename)
        datasets.append(read_CAN_trace(f))

    dataset = pd.DataFrame(columns=datasets[0].columns)
    for d in datasets:
        dataset=pd.concat([dataset, d], ignore_index=True)
    return dataset

def from_hex_to_float(val):
  val = float.fromhex(val)
  return val

def normalize_dataset(dataset):
  """
  Ho creato questa funzione particolare per evitare di dover ripetere tanti cicli for uno dentro l'altro
  """
  #MAX_VALUE_TIMESTAMP = max(dataset['timestamp'])
  #MIN_VALUE_TIMESTAMP = min(dataset['timestamp'])
  #DIFF_MAX_MIN = MAX_VALUE_TIMESTAMP - MIN_VALUE_TIMESTAMP

  MAX_VALUE_CAN_ID = max(dataset['CAN_ID'])
  MIN_VALUE_CAN_ID = min(dataset['CAN_ID'])
  DIFF_MAX_MIN_ID = MAX_VALUE_CAN_ID - MIN_VALUE_CAN_ID

  #def normalize_value_timestamp(val):
    #x_n = (val - MIN_VALUE_TIMESTAMP)/DIFF_MAX_MIN
    #return x_n

  def normalize_value_ID(val):
    x_n = (val - MIN_VALUE_CAN_ID)/DIFF_MAX_MIN_ID
    return x_n
  #dataset['timestamp'] = dataset['timestamp'].apply(normalize_value_timestamp)
  dataset['CAN_ID'] = dataset['CAN_ID'].apply(normalize_value_ID)
  
  return dataset

def dataset_preprocessing(dataset):
  from numpy import float64, float32
  dataset=dataset.dropna() #Elimino eventuali righe con valori nulli 
  dataset['CAN_ID'] =dataset['CAN_ID'].apply(from_hex_to_float)
  dataset['PAYLOAD_HEX']=dataset['PAYLOAD_HEX'].apply(from_hex_to_float)
  dataset['ANOMALY']=dataset['ANOMALY'].astype(bool)
  dataset['PAYLOAD_BIN']=dataset['PAYLOAD_BIN'].astype(float32)
  dataset['PAYLOAD_HEX']=dataset['PAYLOAD_HEX'].astype(float32)
  dataset=dataset.drop(columns=['PAYLOAD_BIN', 'DLC', 'timestamp']) #Tiro via il DLC perchè ha sempre lo stesso valore

  dataset = normalize_dataset(dataset)
  return dataset

def print_results(y_true, predicted):
  print(f'accuracy  --> {accuracy_score(y_true,predicted)}')
  print(f'Confusion Matrix --> \n {confusion_matrix(y_true,predicted)}')
  print(f'F-1 Score --> {f1_score(y_true, predicted)}')


def make_dataset_id(paths):
  #Dato un insieme di percorsi mi restituisce il dataset creato con tutti i file
  datasets = []
  for p in paths:
    datasets.append(read_CAN_trace(p))

  dataset = pd.DataFrame(columns=datasets[0].columns)
  for d in datasets:
    dataset=pd.concat([dataset, d], ignore_index=True)
  return dataset


def make_train_and_test(paths):
  dataset = make_dataset_id(paths)
  dataset = dataset_preprocessing(dataset)

  x_train, x_test, y_train, y_test = train_test_split(dataset.drop(columns=['ANOMALY']), dataset['ANOMALY'], random_state=0, train_size=(3/4), shuffle=True)
  
  #Applico lo SMOTE perchè è sbilanciato
  smote = SMOTE(random_state = 2)
  x_train_res, y_train_res = smote.fit_resample(x_train, y_train)

  estimators = [
              ('dt', DecisionTreeClassifier(max_depth=8, min_samples_split=8, min_samples_leaf=3)),
              ('rf', RandomForestClassifier(verbose = 2, n_estimators = 200, max_depth=8, min_samples_split=8, min_samples_leaf=3, n_jobs=-1)),
              ('ext', ExtraTreesClassifier(verbose = 2, n_estimators = 200, max_depth=8, min_samples_split=8, min_samples_leaf=3, n_jobs=-1)),
              ('xgbc', XGBClassifier(n_estimators = 200, tree_method = 'exact'))

  ]

  stackClassifier = StackingClassifier(estimators = estimators, verbose = 3)
  stackClassifier.fit(x_train_res, y_train_res)

  pred = stackClassifier.predict(x_test)
  print_results(y_test, pred)

  return stackClassifier

def make_SMOTE(x_train, y_train):
  smote = SMOTE(random_state = 2)
  x_train_res, y_train_res = smote.fit_resample(x_train, y_train)

  return x_train_res, y_train_res

Mounted at /content/drive


In [None]:
"""
Serie di file n_8_v40


path_file_1 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_8_V40_01.can.txt"
path_file_2 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_8_V40_02.can.txt"
path_file_3 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_8_V40_03.can.txt"
path_file_4 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_8_V40_04.can.txt"
path_file_5 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_8_V40_05.can.txt"
path_file_6 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_8_V40_06.can.txt"
path_file_7 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_8_V40_07.can.txt"



paths = [path_file_1, path_file_2, path_file_3, path_file_4, path_file_5]
"""

In [None]:
dataset = load_pickle_file("/content/drive/MyDrive/Tesi_Magistrale/notebook/CAR_HACk_RPM_dataframe.pkl")
dataset

Unnamed: 0,timestamp,CAN_ID,DLC,PAYLOAD_HEX,PAYLOAD_BIN,ANOMALY
0,1.478191e+09,018F,8,fe3b0000003c0000,1111111000111011000000000000000000000000001111...,False
1,1.478191e+09,0260,8,19222230ff8f6e3f,1100100100010001000100011000011111111100011110...,False
2,1.478191e+09,02A0,8,6000831d9602bd00,1100000000000001000001100011101100101100000001...,False
3,1.478191e+09,0329,8,dcb87e1411200014,1101110010111000011111100001010000010001001000...,False
4,1.478191e+09,0545,8,d800008300000000,1101100000000000000000001000001100000000000000...,False
...,...,...,...,...,...,...
4621696,1.478201e+09,018F,8,fe59000000410000,1111111001011001000000000000000000000000010000...,False
4621697,1.478201e+09,0260,8,18212130088f6d19,1100000100001001000010011000000001000100011110...,False
4621698,1.478201e+09,02A0,8,24009a1d9702bd00,1001000000000010011010000111011001011100000010...,False
4621699,1.478201e+09,0329,8,dcb77f1411200014,1101110010110111011111110001010000010001001000...,False


In [None]:
CAR_HACK_classifier = make_train_and_test(dataset=dataset)



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   27.6s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.8s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   14.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    8.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   20.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.1s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   20.9s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.6s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   22.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.5s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   23.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.6s finished
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   47.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   18.9s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.6s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.7s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   20.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.8s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   32.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.6s finished
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  2.1min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    9.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.8s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   11.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.2s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   11.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.8s finished
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   24.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    9.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.1s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   10.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.8s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s


building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    9.6s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.8s finished
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   58.9s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   16.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   34.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  1.4min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.6s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Paral

accuracy  --> 1.0
Confusion Matrix --> 
 [[981640      0]
 [     0 163417]]
F-1 Score --> 1.0


In [None]:
"""
Salvo questo modello su file
"""
#import pickle

file_name = "/content/drive/MyDrive/Tesi_Magistrale/notebook/primo_paper/Classificatori/classificatori_CAR_HACK/CAR_HACK_RPM_classifier.pkl"
with open(file_name, "wb") as open_file:
  pickle.dump(CAR_HACK_classifier, open_file)

In [None]:
"""
n_7 OSR
"""

path_file_1 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_7_V40_01.can.txt"
path_file_2 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_7_V40_02.can.txt"
path_file_3 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_7_V40_03.can.txt"
path_file_4 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_7_V40_04.can.txt"
path_file_5 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_7_V40_05.can.txt"
path_file_6 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_7_V40_06.can.txt"
path_file_7 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_7_V40_07.can.txt"

paths = [path_file_1, path_file_2, path_file_3]

In [None]:
classifier = make_train_and_test(paths)
"""
Salvo questo modello su file
"""
import pickle

file_name = "/content/drive/MyDrive/Tesi_Magistrale/notebook/primo_paper/Classificatori/classificatori_DAGA/OSR_n_7_StackClassifier.pkl"
with open(file_name, "wb") as open_file:
  pickle.dump(classifier, open_file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  5.9min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  7.3min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   27.5s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  2.4min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    5.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   14.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent

accuracy  --> 0.7877927101474921
Confusion Matrix --> 
 [[498365 135191]
 [  1521   9161]]
F-1 Score --> 0.11818052814221396


In [None]:
"""
n_3 OSR
"""

path_file_1 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_3_V40_01.can.txt"
path_file_2 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_3_V40_02.can.txt"
path_file_3 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_3_V40_03.can.txt"
path_file_4 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_3_V40_04.can.txt"
path_file_5 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_3_V40_05.can.txt"
path_file_6 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_3_V40_06.can.txt"
path_file_7 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/OrderedSequenceReplay/n_3_V40_07.can.txt"

paths = [path_file_1, path_file_2, path_file_3]

In [None]:
classifier = make_train_and_test(paths)
"""
Salvo questo modello su file
"""
import pickle

file_name = "/content/drive/MyDrive/Tesi_Magistrale/notebook/primo_paper/Classificatori/classificatori_DAGA/OSR_n_3_StackClassifier..pkl"
with open(file_name, "wb") as open_file:
  pickle.dump(classifier, open_file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  5.5min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  2.1min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    4.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   11.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent

accuracy  --> 0.9153590760322255
Confusion Matrix --> 
 [[579784  53791]
 [     0   1945]]
F-1 Score --> 0.06743988488410395


In [2]:
"""
n_5 USR
"""

path_file_1 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_5_V40_01.can.txt"
path_file_2 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_5_V40_02.can.txt"
path_file_3 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_5_V40_03.can.txt"
path_file_4 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_5_V40_04.can.txt"
path_file_5 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_5_V40_05.can.txt"
path_file_6 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_5_V40_06.can.txt"
path_file_7 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_5_V40_07.can.txt"

paths = [path_file_1, path_file_2, path_file_3]

In [3]:
classifier = make_train_and_test(paths)
"""
Salvo questo modello su file
"""
import pickle

file_name = "/content/drive/MyDrive/Tesi_Magistrale/notebook/primo_paper/Classificatori/classificatori_DAGA/USR_n_5_StackClassifier.pkl"
with open(file_name, "wb") as open_file:
  pickle.dump(classifier, open_file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  5.5min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  6.8min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   25.0s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  2.2min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    6.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   15.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent

accuracy  --> 0.8482945201514035
Confusion Matrix --> 
 [[537127  96478]
 [   475   5007]]
F-1 Score --> 0.09361765778230668


In [2]:
"""
n_4 USR
"""

path_file_1 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_4_V40_01.can.txt"
path_file_2 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_4_V40_02.can.txt"
path_file_3 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_4_V40_03.can.txt"
path_file_4 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_4_V40_04.can.txt"
path_file_5 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_4_V40_05.can.txt"
path_file_6 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_4_V40_06.can.txt"
path_file_7 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/UnorderedSequenceReplay/n_4_V40_07.can.txt"

paths = [path_file_1, path_file_2, path_file_3]

In [3]:
classifier = make_train_and_test(paths)
"""
Salvo questo modello su file
"""
import pickle

file_name = "/content/drive/MyDrive/Tesi_Magistrale/notebook/primo_paper/Classificatori/classificatori_DAGA/USR_n_4_StackClassifier.pkl"
with open(file_name, "wb") as open_file:
  pickle.dump(classifier, open_file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  7.5min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  9.3min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   37.9s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  3.2min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    9.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   22.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent

accuracy  --> 0.8882601768939186
Confusion Matrix --> 
 [[562557  70973]
 [   217   3358]]
F-1 Score --> 0.08620645393166122
