In [None]:
from math import sqrt
import sklearn as sk
import pandas as pd
import os 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier
import pickle


from google.colab import drive
drive.mount('/content/drive')

def read_CAN_trace(file):
    dataset = pd.read_csv(file, sep=',')
    return dataset

def load_dataset(folder):

    datasets = []
    for filename in os.listdir(folder):
        f = os.path.join(folder, filename)
        datasets.append(read_CAN_trace(f))

    dataset = pd.DataFrame(columns=datasets[0].columns)
    for d in datasets:
        dataset=pd.concat([dataset, d], ignore_index=True)
    return dataset

def from_hex_to_float(val):
  val = float.fromhex(val)
  return val


def dataset_preprocessing(dataset):
  from numpy import float64, float32
  dataset=dataset.dropna() #Elimino eventuali righe con valori nulli 
  dataset['CAN_ID'] =dataset['CAN_ID'].apply(from_hex_to_float)
  dataset['PAYLOAD_HEX']=dataset['PAYLOAD_HEX'].apply(from_hex_to_float)
  dataset['ANOMALY']=dataset['ANOMALY'].astype(bool)
  dataset['PAYLOAD_BIN']=dataset['PAYLOAD_BIN'].astype(float32)
  dataset['PAYLOAD_HEX']=dataset['PAYLOAD_HEX'].astype(float32)
  dataset=dataset.drop(columns=['PAYLOAD_BIN', 'DLC', 'timestamp'])

  return dataset



def print_results(y_true, predicted):
  print(f'accuracy  --> {accuracy_score(y_true,predicted)}')
  print(f'Confusion Matrix --> \n {confusion_matrix(y_true,predicted)}')
  print(f'F-1 Score --> {f1_score(y_true, predicted)}')


def make_dataset_id(paths):
  #Dato un insieme di percorsi mi restituisce il dataset creato con tutti i file
  datasets = []
  for p in paths:
    datasets.append(read_CAN_trace(p))

  dataset = pd.DataFrame(columns=datasets[0].columns)
  for d in datasets:
    dataset=pd.concat([dataset, d], ignore_index=True)
  return dataset


def make_train_and_test(paths, k_neigh=5):
  dataset = make_dataset_id(paths)
  dataset = dataset_preprocessing(dataset)

  x_train, x_test, y_train, y_test = train_test_split(dataset.drop(columns=['ANOMALY']), dataset['ANOMALY'], random_state=0, train_size=(3/4), shuffle=True)
  
  knn = KNeighborsClassifier(n_jobs = -1, weights = 'distance', n_neighbors=k_neigh)
  rf = RandomForestClassifier(verbose = 2, n_estimators = 100, n_jobs = -1)
  svm = sk.svm.SVC(gamma = 'auto', verbose = True)
  
  print("Inizia il FIT ........")

  knn.fit(x_train, y_train)
  print("Fine fit KNN .........")

  rf.fit(x_train, y_train)
  print("Fine fit RF ..........")

  svm.fit(x_train, y_train)
  print("Fine fit SVM .........")


  
  print("\nKNN results:")
  pred = knn.predict(x_test)
  print_results(y_test, pred)

  print("\nRF results:")
  pred = rf.predict(x_test)
  print_results(y_test, pred)

  print("\nSVM results:")
  pred = svm.predict(x_test)
  print_results(y_test, pred)

  return knn, rf, svm


In [None]:
"""
DOS
"""

path_file_1 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/DenialOfService/lowestID_V40_01.can.txt"
path_file_2 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/DenialOfService/lowestID_V40_02.can.txt"
path_file_3 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/DenialOfService/lowestID_V40_03.can.txt"
path_file_4 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/DenialOfService/lowestID_V40_04.can.txt"
path_file_5 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/DenialOfService/lowestID_V40_05.can.txt"
path_file_6 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/DenialOfService/lowestID_V40_06.can.txt"
path_file_7 = "/content/drive/MyDrive/Tesi_Magistrale/Dataset_DAGA/infected/DenialOfService/lowestID_V40_07.can.txt"

paths = [path_file_1, path_file_2, path_file_3, path_file_4]

In [None]:
dataset = make_dataset_id(paths)
dataset = dataset_preprocessing(dataset)
dataset

In [None]:
knn, rf, svm = make_train_and_test(paths, k_neigh=5)

In [None]:
"""
Saving models
"""
file_name = "/content/drive/MyDrive/Tesi_Magistrale/notebook/secondo_paper/classificatori/DAGA_classificatori/DOS_KNN.pkl"
with open(file_name, "wb") as open_file:
  pickle.dump(knn, open_file)

file_name = "/content/drive/MyDrive/Tesi_Magistrale/notebook/secondo_paper/classificatori/DAGA_classificatori/DOS_RF.pkl"
with open(file_name, "wb") as open_file:
  pickle.dump(rf, open_file)

file_name = "/content/drive/MyDrive/Tesi_Magistrale/notebook/secondo_paper/classificatori/DAGA_classificatori/DOS_SVM.pkl"
with open(file_name, "wb") as open_file:
  pickle.dump(svm, open_file)

**TEST DEL MODELLO DEL DOS SUGLI ALTRI FILE**

In [None]:
dataset = read_CAN_trace(path_file_5)
dataset = dataset_preprocessing(dataset)

Y = dataset['ANOMALY']
x = dataset.drop(columns=['ANOMALY'])

pred = knn.predict(x)
print("KNN results: .......")
print_results(Y, pred)

pred = rf.predict(x)
print("RF results: .......")
print_results(Y, pred)

pred = svm.predict(x)
print("SVM results: .......")
print_results(Y, pred)

In [None]:
dataset = read_CAN_trace(path_file_6)
dataset = dataset_preprocessing(dataset)

Y = dataset['ANOMALY']
x = dataset.drop(columns=['ANOMALY'])

pred = knn.predict(x)
print("KNN results: .......")
print_results(Y, pred)

pred = rf.predict(x)
print("RF results: .......")
print_results(Y, pred)

pred = svm.predict(x)
print("SVM results: .......")
print_results(Y, pred)

In [None]:
dataset = read_CAN_trace(path_file_7)
dataset = dataset_preprocessing(dataset)

Y = dataset['ANOMALY']
x = dataset.drop(columns=['ANOMALY'])

pred = knn.predict(x)
print("KNN results: .......")
print_results(Y, pred)

pred = rf.predict(x)
print("RF results: .......")
print_results(Y, pred)

pred = svm.predict(x)
print("SVM results: .......")
print_results(Y, pred)