# Digital Twin model

## Initialization

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ML.DTPredictor import DTPredictor
from ML.Model import Predicter
from PreProcessing.DataManipulation import DataManipulation
import pickle

In [2]:
MODEL_PREFIX = "deep"

In [3]:
dm = DataManipulation("../data/deep-scenario.csv")
dm.data.head(3)

Unnamed: 0,Execution,ScenarioID,Configuration_API_Description,Attribute[TTC],Attribute[DTO],Attribute[Jerk],Attribute[COL],Attribute[COLT],Attribute[SAC],reward,road,strategy,scenario
0,0,0_scenario_0,A red BoxTruck is overtaking (near) the ego ve...,100000.0,24.810964,3.48,False,,0.0,dto,road1,greedy,rain_day
1,0,0_scenario_1,A skyblue SUV is driving from the opposite dir...,100000.0,8.654072,1.04,False,,0.0,dto,road1,greedy,rain_day
2,0,0_scenario_2,A skyblue BoxTruck is driving ahead (near) of ...,2.103269,10.209357,4.98,False,,0.0,dto,road1,greedy,rain_day


## Data Manipulation

In [4]:
dm.addFromXML("../data/dataFromXML.csv")

In [8]:
# ids = [268, 273, 477, 1406, 1662, 1961, 2000, 2001, 2002, 2091]
cols = ["Attribute[DTO]", "speed1", "speed2", "speed3", "speed4", "speed5", "speed6", "Attribute[SAC]"]
# # dm.data.loc[dm.data["Attribute[COL]"] == True][["Attribute[DTO]", "speed1", "speed2", "speed3", "speed4", "speed5", "speed6", "Attribute[SAC]"]]
# dm.data.iloc[ids][cols]
dm.data.loc[dm.data["Attribute[COL]"] == True][cols].to_csv("speeddddds")

In [10]:
ids = [268, 273, 477, 1406, 1662, 1961, 2000, 2001, 2002, 2091]
cols = ["Attribute[DTO]", "speed1", "speed2", "speed3", "speed4", "speed5", "speed6", "Attribute[SAC]"]
# dm.data.loc[dm.data["Attribute[COL]"] == True][["Attribute[DTO]", "speed1", "speed2", "speed3", "speed4", "speed5", "speed6", "Attribute[SAC]"]]
dm.data.iloc[ids][cols]

Unnamed: 0,Attribute[DTO],speed1,speed2,speed3,speed4,speed5,speed6,Attribute[SAC]
268,1.295431,6.304,5.137,3.637,2.589,3.443,4.764,4.442793
273,1.331153,6.479,5.309,2.991,1.945,3.981,5.591,2.346909
477,1.411721,0.001,1.514,1.804,2.7,3.077,4.397,2.378859
1406,4.095299,10.067,10.158,7.843,3.543,3.705,4.302,3.715036
1662,1.378684,5.026,3.875,2.626,3.734,5.172,5.616,3.522738
1961,1.4738,8.08,6.753,5.336,5.005,6.832,6.982,5.225631
2000,50.289233,0.082,1.422,2.504,0.007,0.003,0.001,1.340647
2001,25.204523,0.135,1.481,2.582,0.003,0.002,0.005,0.217742
2002,27.160148,0.135,1.557,2.778,0.003,0.005,0.004,0.34492
2091,1.511598,6.909,5.71,3.271,4.587,6.061,7.418,3.923682


## Model

In [11]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

In [12]:
dm.underSample(3000)
dm.data.value_counts("Attribute[COL]")

Attribute[COL]
False    3000
True     1050
dtype: int64

In [13]:
trainX, trainY, testX, testY = dm.splitTrainTest()
print(f"trainX:{trainX.shape}, trainY:{trainY.shape}, testX:{testX.shape}, testY:{testY.shape}")

splitting at 3240.
trainX:(3240, 30), trainY:(3240, 4), testX:(810, 30), testY:(810, 4)


### MLP Classifier

In [14]:
mlp_clf = Predicter()
mlp_trainX, mlp_trainY = mlp_clf.preProcess(trainX.copy(), trainY.copy())
mlp_testX, mlp_testY = mlp_clf.preProcess(testX.copy(), testY.copy())

mlp_clf.fit(mlp_trainX, mlp_trainY)

Scaler is fitted




In [15]:
mlp_pred = mlp_clf.predict(mlp_testX)
mlp_score = mlp_clf.getScore(mlp_pred, mlp_testY)

Total: 810, number of collisions: 217
	TN: 572 	| FP: 21 
	FN: 27 	| TP: 190
Accuracy: 0.94
Precision: 0.9
Recall: 0.88
F1: 0.89


In [16]:
model_score = f"{mlp_score[0][0]}-{mlp_score[0][1]}-{mlp_score[1][0]}-{mlp_score[1][1]}"
mlp_clf.saveModel(f"MLPClassifier_{MODEL_PREFIX}", accuracy=model_score)

Model saved!


### Random Forest

In [17]:
rf_clf = DTPredictor(RandomForestClassifier(n_estimators=100, max_depth=10, random_state=1))
rf_trainX, rf_trainY = rf_clf.preProcess(trainX.copy(), trainY.copy())
rf_testX, rf_testY = rf_clf.preProcess(testX.copy(), testY.copy())

rf_clf.fit(rf_trainX, rf_trainY)

Scaler is fitted


In [18]:
rf_pred = rf_clf.predict(rf_testX)
rf_score = rf_clf.getScore(rf_testY, rf_pred)

Total: 810, Collisions: 217
Accuracy: 0.9506172839506173, Precision: 0.9359605911330049, Recall: 0.8755760368663594, F1: 0.9047619047619048
Confusion matrix:
[[580  13]
 [ 27 190]]


In [19]:
model_score = f"{rf_score[0][0]}-{rf_score[0][1]}-{rf_score[1][0]}-{rf_score[1][1]}"
rf_clf.saveModel(f"RandomForestClassifier_{MODEL_PREFIX}", accuracy=model_score)

Model saved!


### Support Vector Machine - Classifier

In [20]:
svm_clf = DTPredictor(SVC(random_state=1))
svm_trainX, svm_trainY = svm_clf.preProcess(trainX.copy(), trainY.copy())
svm_testX, svm_testY = svm_clf.preProcess(testX.copy(), testY.copy())

Scaler is fitted


In [21]:
svm_clf.fit(svm_trainX, svm_trainY)

In [23]:
svm_pred = svm_clf.predict(svm_testX)
svm_score = svm_clf.getScore(svm_testY, svm_pred)

Total: 810, Collisions: 217
Accuracy: 0.8962962962962963, Precision: 0.9235668789808917, Recall: 0.6682027649769585, F1: 0.7754010695187166
Confusion matrix:
[[581  12]
 [ 72 145]]


In [24]:
model_score = f"{svm_score[0][0]}-{svm_score[0][1]}-{svm_score[1][0]}-{svm_score[1][1]}"
svm_clf.saveModel(f"SVC_{MODEL_PREFIX}", accuracy=model_score)

Model saved!


### xgboost - Classifier

In [25]:
xgb_clf = DTPredictor(XGBClassifier(n_estimators=100, random_state=1))
xgb_trainX, xgb_trainY = xgb_clf.preProcess(trainX.copy(), trainY.copy())
xgb_testX, xgb_testY = xgb_clf.preProcess(testX.copy(), testY.copy())

Scaler is fitted


In [26]:
xgb_clf.fit(xgb_trainX, xgb_trainY)

In [27]:
xgb_pred = xgb_clf.predict(xgb_testX)
xgb_score = xgb_clf.getScore(xgb_testY, xgb_pred)

Total: 810, Collisions: 217
Accuracy: 0.9555555555555556, Precision: 0.9289099526066351, Recall: 0.9032258064516129, F1: 0.9158878504672897
Confusion matrix:
[[578  15]
 [ 21 196]]


In [28]:
model_score = f"{xgb_score[0][0]}-{xgb_score[0][1]}-{xgb_score[1][0]}-{xgb_score[1][1]}"
xgb_clf.saveModel(f"XGBClassifier_{MODEL_PREFIX}", accuracy=model_score)

Model saved!
