In [1]:
from pandas import DataFrame, read_csv, concat
import random
import math
from useGeneratedData import splitTrainTest, NewPredicter, makeDataUsable

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
# Testing data
size = 20
paramsToStore = {"Time": [0,1,2,3,0,1,2,3,4,5,0,1,2,3,4,5,6,7,8,9],
                "TTC": [random.uniform(0, 10) for _ in range(size)],
                "DTO": [random.uniform(0, 20) for _ in range(size)],
                "JERK": [abs(random.gauss(0, 20)) for _ in range(size)],
                "Speed": [random.uniform(0, 15) for _ in range(size)],
                "asX": [random.gauss(0, 0.15) for _ in range(size)],
                "asY": [random.gauss(0, 0.15) for _ in range(size)],
                "asZ": [random.gauss(0, 0.1) for _ in range(size)],
                "COL": [0,0,0,1,0,0,0,1,0,0, 0,0,0,1,0,0,0,1,0,0]}
df = DataFrame(paramsToStore)
# df.set_index("Time", inplace=True)
# df.to_csv(PATH+"/data/testGenerateData.csv", index=False)
df.head(20)

Unnamed: 0,Time,TTC,DTO,JERK,Speed,asX,asY,asZ,COL
0,0,7.925208,15.933437,25.64581,12.590772,0.025443,-0.18077,0.075587,0
1,1,7.002332,16.233319,40.208376,12.789863,0.032708,-0.025316,0.105092,0
2,2,3.767259,10.013044,42.024013,0.027384,-0.511466,0.105235,-0.043563,0
3,3,6.836008,9.931474,11.594839,5.322831,0.087182,0.147977,0.101152,1
4,0,0.219982,4.359584,53.014012,6.995083,-0.005002,0.027986,0.140186,0
5,1,2.555299,1.940769,27.63937,11.763641,-0.100998,0.316521,-0.088831,0
6,2,3.358577,17.158828,16.067957,0.169314,0.086034,-0.084619,-0.012201,0
7,3,5.633592,2.865899,26.744139,14.388324,0.009543,-0.077476,-0.115699,1
8,4,9.913071,5.499353,24.469136,10.913183,-0.216512,0.356399,-0.011747,0
9,5,2.333337,6.422581,6.450475,4.520565,0.293791,-0.234486,-0.042731,0


In [3]:
def makeDataUsable(df: DataFrame, pastImportance: int=6, rowsBeforeCol: int=3, removeCol: bool=True) -> DataFrame:
    """
    Make a usable Dataframe from a csv file from generated data.\\
    Creates pastImportance of columns for each prediction feature.\\
    Registers the "col" feature before an actual collision as 1, up to secBeforeCol back in time.\\
    If undersampleRatio is > 0, it will under sample the data, if it is 0, it will not.
    
    ### Params:
        * df: Dataframe, need to have the columns named "TTC", "DTO", "JERK" and "Speed"ArithmeticError
        * pastImportance: int, needs to be > 0
        * rowsBeforeCol: int, needs to be > 0
        * removeCol: bool, True if the origianl collision row should be removed
    ### Returns:
        * Dataframe
    """
    colsToUse = ["TTC", "DTO", "JERK", "Speed", "asX", "asY", "asZ"]
    columns = ["Time"]
    columns += [f"{c}{i}" for c in colsToUse for i in range(1, pastImportance+1)]
    columns += ["COL", "toRemove"]

    rowsToRemove = []
    
    dataDict = {col: [] for col in columns}

    for i, row in df.iterrows():
        if row["Time"] < pastImportance-1:
            continue

        dataDict["Time"].append(row["Time"])
        for j, k in enumerate(range(i-pastImportance+1, i+1), start=1):
            for c in colsToUse:
                # Should be able to remove this if check with new data
                if (c == "TTC" or c == "DTO") and df.iloc[k][c] < 0:
                    dataDict[f"{c}{j}"].append(0)
                    continue
                dataDict[f"{c}{j}"].append(df.iloc[k][c])

        dataDict["COL"].append(row["COL"])
        dataDict["toRemove"].append(row["COL"])
        if row["COL"] == 1:
            rowsToRemove.append(i-1)
            available = int(row["Time"]-pastImportance+2)
            amount = int(rowsBeforeCol) if available >= rowsBeforeCol else available
            dataDict["COL"][-amount:] = [1]*amount

    df = DataFrame(dataDict)
    df = df[df["toRemove"] == 0] if removeCol else df
    df.drop("toRemove", axis=1, inplace=True)
    return df

# makeDataUsable(read_csv("../../data/testGenerateData.csv"), pastImportance=4, secBeforeCol=4)

In [4]:
makeDataUsable(df, 2, 3, True)

Unnamed: 0,Time,TTC1,TTC2,DTO1,DTO2,JERK1,JERK2,Speed1,Speed2,asX1,asX2,asY1,asY2,asZ1,asZ2,COL
0,1.0,7.925208,7.002332,15.933437,16.233319,25.64581,40.208376,12.590772,12.789863,0.025443,0.032708,-0.18077,-0.025316,0.075587,0.105092,1.0
1,2.0,7.002332,3.767259,16.233319,10.013044,40.208376,42.024013,12.789863,0.027384,0.032708,-0.511466,-0.025316,0.105235,0.105092,-0.043563,1.0
3,1.0,0.219982,2.555299,4.359584,1.940769,53.014012,27.63937,6.995083,11.763641,-0.005002,-0.100998,0.027986,0.316521,0.140186,-0.088831,1.0
4,2.0,2.555299,3.358577,1.940769,17.158828,27.63937,16.067957,11.763641,0.169314,-0.100998,0.086034,0.316521,-0.084619,-0.088831,-0.012201,1.0
6,4.0,5.633592,9.913071,2.865899,5.499353,26.744139,24.469136,14.388324,10.913183,0.009543,-0.216512,-0.077476,0.356399,-0.115699,-0.011747,0.0
7,5.0,9.913071,2.333337,5.499353,6.422581,24.469136,6.450475,10.913183,4.520565,-0.216512,0.293791,0.356399,-0.234486,-0.011747,-0.042731,0.0
8,1.0,4.089684,0.781457,10.681662,10.684111,1.260866,10.981635,0.8907,4.512204,-0.135675,0.416428,-0.112562,-0.004654,-0.088185,-0.095902,1.0
9,2.0,0.781457,3.899687,10.684111,7.941733,10.981635,41.394334,4.512204,9.584366,0.416428,-0.20207,-0.004654,-0.023975,-0.095902,0.116757,1.0
11,4.0,0.562588,7.689322,5.542141,1.613677,10.474201,11.753614,6.046215,7.945042,-0.041899,-0.21673,0.263737,-0.061423,-0.020339,-0.14613,0.0
12,5.0,7.689322,6.538496,1.613677,7.640565,11.753614,17.43018,7.945042,6.023951,-0.21673,0.005194,-0.061423,0.019083,-0.14613,-0.196812,1.0


In [18]:
rawData = read_csv("../../data/generatedData.csv")
rawData.iloc[18:25]

Unnamed: 0,Time,TTC,DTO,JERK,Speed,asX,asY,asZ,COL
18,19.0,14.615,19.1452,1.447,13.282,0.0,0.0,0.0,0
19,20.0,17.307,18.2413,0.009,12.981,0.0,0.0,0.0,0
20,21.0,5.168,15.4067,0.009,12.689,0.0,0.0,0.0,0
21,22.0,0.247,3.0764,0.007,12.404,0.0,0.0,-0.0,0
22,23.0,0.03,0.2646,11.97,0.149,-0.017,-0.001,0.006,1
23,24.0,50.0,0.8598,12.106,0.0,0.0,0.0,0.0,0
24,25.0,50.0,1.4957,0.149,0.0,-0.0,0.0,0.0,0


In [22]:
len(rawData)//2/60

34.81666666666667

In [5]:
data = makeDataUsable(read_csv("../../data/generatedData.csv"), pastImportance=4, rowsBeforeCol=5, removeCol=True)
data.head(5)


Unnamed: 0,Time,TTC1,TTC2,TTC3,TTC4,DTO1,DTO2,DTO3,DTO4,JERK1,...,asX4,asY1,asY2,asY3,asY4,asZ1,asZ2,asZ3,asZ4,COL
0,3.0,50.0,50.0,50.0,50.0,16.6264,100.0,100.0,100.0,0.004,...,0.0,0.0,-0.0,-0.001,-0.001,0.0,0.006,0.001,0.0,0.0
1,4.0,50.0,50.0,50.0,50.0,100.0,100.0,100.0,100.0,0.661,...,-0.001,-0.0,-0.001,-0.001,0.0,0.006,0.001,0.0,-0.001,0.0
2,5.0,50.0,50.0,50.0,50.0,100.0,100.0,100.0,100.0,4.117,...,-0.0,-0.001,-0.001,0.0,0.0,0.001,0.0,-0.001,0.0,0.0
3,6.0,50.0,50.0,50.0,50.0,100.0,100.0,100.0,100.0,0.865,...,0.0,-0.001,0.0,0.0,0.0,0.0,-0.001,0.0,0.0,0.0
4,7.0,50.0,50.0,50.0,5.322,100.0,100.0,100.0,83.3296,3.704,...,0.004,0.0,0.0,0.0,-0.001,-0.001,0.0,0.0,0.005,0.0


In [10]:
cc = read_csv("../../data/generatedData.csv")
collisions = len(data[data["COL"] == 1])

print(f"Amount of unique collisions: {len(cc[cc['COL'] == 1])}")
print(f"Amount of collisions: {len(data[data['COL'] == 1])}")
print(f"Amount not colliding: {len(data[data['COL'] == 0])}")

Amount of unique collisions: 215
Amount of collisions: 492
Amount not colliding: 3458


In [13]:
data.corr()["COL"]

Time     -0.065969
TTC1     -0.101218
TTC2     -0.177124
TTC3     -0.251498
TTC4     -0.316259
DTO1     -0.062557
DTO2     -0.109763
DTO3     -0.157293
DTO4     -0.208628
JERK1    -0.051574
JERK2    -0.043444
JERK3    -0.045155
JERK4    -0.068430
Speed1    0.154619
Speed2    0.170875
Speed3    0.174013
Speed4    0.168772
asX1     -0.010917
asX2     -0.007154
asX3     -0.014407
asX4     -0.017245
asY1      0.017051
asY2      0.018624
asY3      0.000970
asY4     -0.001754
asZ1      0.014642
asZ2      0.021415
asZ3      0.024463
asZ4      0.038726
COL       1.000000
Name: COL, dtype: float64

In [97]:
def getResultsFromData(filename: str, pastImportance: int=4, rowsBeforeCol: int=4, removeCol: bool=True, undersampleRatio: float=1):
    """
    TODO
    ### Params:
        * undersampleRatio: float, ratio of non collisions / collisions
    """
    data = makeDataUsable(read_csv(filename), pastImportance, rowsBeforeCol, removeCol)
    # dataToUse = data.copy()
    data.drop(columns=data.columns[0], axis=1, inplace=True) # Dropping 'Time'
    # print(data.head(10))
    # data.corr()["COL"]

    if undersampleRatio > 0:
        sampleSize = int(len(data[data['COL'] == 1]) * undersampleRatio)
        if sampleSize < len(data[data['COL'] == 0]):
            print("Undersampling")
            data = concat([data[data["COL"] == 0].sample(sampleSize, random_state=1), data[data["COL"] == True]])
    
    trainX, trainY, testX, testY = splitTrainTest(data, splitRatio=0.8)
    print(f"trainX: {trainX.shape}, trainY: {trainY.shape}, testX: {testX.shape}, testY: {testY.shape}")

    p = NewPredicter()
    trainXpp = p.preProcess(trainX)
    trainYpp = trainY.to_numpy()
    testXpp = p.preProcess(testX)
    testYpp = testY.to_numpy()

    p.fit(trainXpp, trainYpp)
    pred = p.predict(testXpp)
    cm = p.getScore(pred, testYpp)

    return p, data, cm

In [98]:
p, dataRemoveCol, cmRemoveCol = getResultsFromData("../../data/generatedData.csv", pastImportance=4, rowsBeforeCol=4, removeCol=True, undersampleRatio=1)
# dataRemoveCol.corr()["COL"]

Undersampling
Splitting at 598, total rows: 748
trainX: (598, 28), trainY: (598,), testX: (150, 28), testY: (150,)
Scaler is fitted
Total: 150, Collisions: 73
Accuracy: 0.6533333333333333, Precision: 0.6329113924050633, Recall: 0.684931506849315, F1: 0.6578947368421053
Confusion matrix:
[[48 29]
 [23 50]]




In [51]:
print("Feature   Corr to 'COL'   Min value    Max value")
for c, x, y, z in zip(dataRemoveCol.columns, dataRemoveCol.corr()["COL"], dataRemoveCol.min(), dataRemoveCol.max()):
    print(f"{c.ljust(12)} {str(round(x, 3)).ljust(12)} {str(round(y, 3)).ljust(12)} {str(round(z, 3)).ljust(12)}")


Feature   Corr to 'COL'   Min value    Max value
TTC1         -0.218       0.0          50.0        
TTC2         -0.301       0.0          50.0        
TTC3         -0.378       0.0          50.0        
TTC4         -0.467       0.0          50.0        
DTO1         -0.108       0.0          100.0       
DTO2         -0.223       0.0          100.0       
DTO3         -0.308       0.0          100.0       
DTO4         -0.399       0.0          100.0       
JERK1        -0.07        0.0          65.892      
JERK2        -0.04        0.0          85.116      
JERK3        -0.058       0.0          86.352      
JERK4        -0.095       0.0          33.483      
Speed1       0.214        0.0          32.73       
Speed2       0.212        0.0          34.721      
Speed3       0.202        0.0          32.73       
Speed4       0.185        0.0          34.721      
asX1         0.038        -0.463       0.233       
asX2         -0.049       -0.315       0.254       
asX3         -0

In [87]:
dataRemoveCol[['TTC1', 'TTC2', 'TTC3', 'TTC4', 'DTO1', 'DTO2', 'DTO3', 'DTO4', 'JERK1','JERK2', 'JERK3', 'JERK4', 'Speed1', 'Speed2', 'Speed3', 'Speed4', 'COL']].corr()
# dataRemoveCol[['TTC1', 'TTC2', 'TTC3', 'TTC4']]

Unnamed: 0,TTC1,TTC2,TTC3,TTC4,DTO1,DTO2,DTO3,DTO4,JERK1,JERK2,JERK3,JERK4,Speed1,Speed2,Speed3,Speed4,COL
TTC1,1.0,0.572853,0.396297,0.286086,0.338424,0.323809,0.330951,0.292208,-0.017977,-0.146863,-0.084863,0.064879,-0.231838,-0.114824,-0.045497,-0.0078,-0.217747
TTC2,0.572853,1.0,0.560639,0.428378,0.265085,0.387294,0.359175,0.347288,0.08589,-0.055062,-0.076778,0.124697,-0.320184,-0.209518,-0.146671,-0.099175,-0.30052
TTC3,0.396297,0.560639,1.0,0.57498,0.097355,0.285341,0.390649,0.339234,0.091405,0.071057,0.03461,0.086277,-0.393839,-0.370772,-0.274672,-0.212991,-0.378448
TTC4,0.286086,0.428378,0.57498,1.0,0.101534,0.194148,0.329302,0.439026,0.096947,0.069894,0.053752,0.141017,-0.34147,-0.349524,-0.346727,-0.285268,-0.467215
DTO1,0.338424,0.265085,0.097355,0.101534,1.0,0.769329,0.603658,0.511128,-0.144968,-0.12568,-0.058936,-0.0595,0.398637,0.455911,0.457441,0.458051,-0.107781
DTO2,0.323809,0.387294,0.285341,0.194148,0.769329,1.0,0.770722,0.625268,-0.116848,-0.114748,-0.06579,-0.036706,0.290372,0.364644,0.396546,0.413489,-0.222618
DTO3,0.330951,0.359175,0.390649,0.329302,0.603658,0.770722,1.0,0.732186,-0.094765,-0.09774,-0.079688,-0.052941,0.15176,0.236185,0.305589,0.340691,-0.308187
DTO4,0.292208,0.347288,0.339234,0.439026,0.511128,0.625268,0.732186,1.0,-0.049303,-0.043501,-0.017708,-0.01712,0.079263,0.13482,0.193323,0.242183,-0.399014
JERK1,-0.017977,0.08589,0.091405,0.096947,-0.144968,-0.116848,-0.094765,-0.049303,1.0,0.3894,0.020966,0.082649,-0.234327,-0.22805,-0.204099,-0.186089,-0.069724
JERK2,-0.146863,-0.055062,0.071057,0.069894,-0.12568,-0.114748,-0.09774,-0.043501,0.3894,1.0,0.662851,0.138306,-0.069479,-0.230282,-0.223485,-0.208238,-0.039798


In [35]:
p, dataNotRemoveCol, cmNotRemoveCol = getResultsFromData("../../data/generatedData.csv", pastImportance=4, rowsBeforeCol=4, removeCol=False, undersampleRatio=1)
# dataNotRemoveCol.corr()["COL"]

Undersampling
Splitting at 942, total rows: 1178
trainX: (942, 28), trainY: (942,), testX: (236, 28), testY: (236,)
Scaler is fitted
Total: 236, Collisions: 119
Accuracy: 0.7372881355932204, Precision: 0.728, Recall: 0.7647058823529411, F1: 0.7459016393442623
Confusion matrix:
[[83 34]
 [28 91]]




In [37]:
print("Feature, removed, not removed")
for c, x, y, in zip(dataRemoveCol.columns, dataRemoveCol.corr()["COL"], dataNotRemoveCol.corr()["COL"]):
    print(c, round(x, 2), round(y, 2))

Feature, removed, not removed
TTC1 -0.22 -0.3
TTC2 -0.3 -0.38
TTC3 -0.38 -0.45
TTC4 -0.47 -0.44
DTO1 -0.11 -0.2
DTO2 -0.22 -0.28
DTO3 -0.31 -0.36
DTO4 -0.4 -0.42
JERK1 -0.07 -0.07
JERK2 -0.04 -0.02
JERK3 -0.06 0.19
JERK4 -0.09 0.32
Speed1 0.21 0.24
Speed2 0.21 0.22
Speed3 0.2 0.11
Speed4 0.18 -0.06
asX1 0.04 0.04
asX2 -0.05 -0.01
asX3 -0.0 0.04
asX4 -0.05 0.02
asY1 0.01 0.04
asY2 0.07 -0.01
asY3 -0.0 -0.01
asY4 -0.04 -0.05
asZ1 0.03 0.06
asZ2 0.07 0.07
asZ3 0.01 -0.0
asZ4 0.05 -0.01
COL 1.0 1.0


In [42]:
# NOTE DO NOT RUN
# Expected time to run: ~53 minutes

pastImportances = [2,3,4,5,6,7]
rowsBeforeCols = [2,3,4,5,6,7]
removeCols = {False, True}
samplings = [1, 1.5, 2]

allCombos = [(pi, rbc, rc, sr) for pi in pastImportances for rbc in rowsBeforeCols for rc in removeCols for sr in samplings]

for (pi, rbc, rc, sr) in allCombos:
    print(f"\nPast importance: {pi}, sec before col: {rbc}, removeCol: {rc}, sample ratio: {sr}")
    _, _ = getResultsFromData("../../data/generatedData.csv", pastImportance=pi, rowsBeforeCol=rbc, removeCol=rc, undersampleRatio=sr)


Past importance: 2, sec before col: 2, removeCol: False, sample ratio: 1
Undersampling
Splitting at 552, total rows: 690
trainX: (552, 14), trainY: (552,), testX: (138, 14), testY: (138,)
Scaler is fitted




Total: 138, Collisions: 73
Accuracy: 0.8623188405797102, Precision: 0.875, Recall: 0.863013698630137, F1: 0.8689655172413793
Confusion matrix:
[[56  9]
 [10 63]]

Past importance: 2, sec before col: 2, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 689, total rows: 862
trainX: (689, 14), trainY: (689,), testX: (173, 14), testY: (173,)
Scaler is fitted




Total: 173, Collisions: 67
Accuracy: 0.8439306358381503, Precision: 0.7857142857142857, Recall: 0.8208955223880597, F1: 0.8029197080291971
Confusion matrix:
[[91 15]
 [12 55]]

Past importance: 2, sec before col: 2, removeCol: False, sample ratio: 2
Undersampling
Splitting at 828, total rows: 1035
trainX: (828, 14), trainY: (828,), testX: (207, 14), testY: (207,)
Scaler is fitted
Total: 207, Collisions: 71
Accuracy: 0.9033816425120773, Precision: 0.84, Recall: 0.8873239436619719, F1: 0.863013698630137
Confusion matrix:
[[124  12]
 [  8  63]]

Past importance: 2, sec before col: 2, removeCol: True, sample ratio: 1
Undersampling
Splitting at 208, total rows: 260
trainX: (208, 14), trainY: (208,), testX: (52, 14), testY: (52,)
Scaler is fitted
Total: 52, Collisions: 31
Accuracy: 0.7115384615384616, Precision: 0.7857142857142857, Recall: 0.7096774193548387, F1: 0.7457627118644068
Confusion matrix:
[[15  6]
 [ 9 22]]

Past importance: 2, sec before col: 2, removeCol: True, sample ratio: 1.5



Total: 65, Collisions: 26
Accuracy: 0.7692307692307693, Precision: 0.72, Recall: 0.6923076923076923, F1: 0.7058823529411765
Confusion matrix:
[[32  7]
 [ 8 18]]

Past importance: 2, sec before col: 2, removeCol: True, sample ratio: 2
Undersampling
Splitting at 312, total rows: 390
trainX: (312, 14), trainY: (312,), testX: (78, 14), testY: (78,)
Scaler is fitted




Total: 78, Collisions: 24
Accuracy: 0.8461538461538461, Precision: 0.7307692307692307, Recall: 0.7916666666666666, F1: 0.76
Confusion matrix:
[[47  7]
 [ 5 19]]

Past importance: 2, sec before col: 3, removeCol: False, sample ratio: 1
Undersampling
Splitting at 750, total rows: 938
trainX: (750, 14), trainY: (750,), testX: (188, 14), testY: (188,)
Scaler is fitted




Total: 188, Collisions: 94
Accuracy: 0.8138297872340425, Precision: 0.7920792079207921, Recall: 0.851063829787234, F1: 0.8205128205128204
Confusion matrix:
[[73 21]
 [14 80]]

Past importance: 2, sec before col: 3, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 937, total rows: 1172
trainX: (937, 14), trainY: (937,), testX: (235, 14), testY: (235,)
Scaler is fitted




Total: 235, Collisions: 95
Accuracy: 0.8425531914893617, Precision: 0.8085106382978723, Recall: 0.8, F1: 0.8042328042328043
Confusion matrix:
[[122  18]
 [ 19  76]]

Past importance: 2, sec before col: 3, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1125, total rows: 1407
trainX: (1125, 14), trainY: (1125,), testX: (282, 14), testY: (282,)
Scaler is fitted




Total: 282, Collisions: 85
Accuracy: 0.8333333333333334, Precision: 0.69, Recall: 0.8117647058823529, F1: 0.7459459459459459
Confusion matrix:
[[166  31]
 [ 16  69]]

Past importance: 2, sec before col: 3, removeCol: True, sample ratio: 1
Undersampling
Splitting at 406, total rows: 508
trainX: (406, 14), trainY: (406,), testX: (102, 14), testY: (102,)
Scaler is fitted
Total: 102, Collisions: 53
Accuracy: 0.803921568627451, Precision: 0.8235294117647058, Recall: 0.7924528301886793, F1: 0.8076923076923077
Confusion matrix:
[[40  9]
 [11 42]]

Past importance: 2, sec before col: 3, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 508, total rows: 635
trainX: (508, 14), trainY: (508,), testX: (127, 14), testY: (127,)
Scaler is fitted




Total: 127, Collisions: 51
Accuracy: 0.7559055118110236, Precision: 0.6785714285714286, Recall: 0.7450980392156863, F1: 0.7102803738317757
Confusion matrix:
[[58 18]
 [13 38]]

Past importance: 2, sec before col: 3, removeCol: True, sample ratio: 2
Undersampling
Splitting at 609, total rows: 762
trainX: (609, 14), trainY: (609,), testX: (153, 14), testY: (153,)
Scaler is fitted
Total: 153, Collisions: 42
Accuracy: 0.803921568627451, Precision: 0.6111111111111112, Recall: 0.7857142857142857, F1: 0.6875000000000001
Confusion matrix:
[[90 21]
 [ 9 33]]

Past importance: 2, sec before col: 4, removeCol: False, sample ratio: 1
Undersampling
Splitting at 942, total rows: 1178
trainX: (942, 14), trainY: (942,), testX: (236, 14), testY: (236,)
Scaler is fitted




Total: 236, Collisions: 123
Accuracy: 0.7542372881355932, Precision: 0.76, Recall: 0.7723577235772358, F1: 0.7661290322580646
Confusion matrix:
[[83 30]
 [28 95]]

Past importance: 2, sec before col: 4, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1177, total rows: 1472
trainX: (1177, 14), trainY: (1177,), testX: (295, 14), testY: (295,)
Scaler is fitted




Total: 295, Collisions: 124
Accuracy: 0.7830508474576271, Precision: 0.7380952380952381, Recall: 0.75, F1: 0.744
Confusion matrix:
[[138  33]
 [ 31  93]]

Past importance: 2, sec before col: 4, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1413, total rows: 1767
trainX: (1413, 14), trainY: (1413,), testX: (354, 14), testY: (354,)
Scaler is fitted
Total: 354, Collisions: 117
Accuracy: 0.7909604519774012, Precision: 0.6616541353383458, Recall: 0.7521367521367521, F1: 0.704
Confusion matrix:
[[192  45]
 [ 29  88]]

Past importance: 2, sec before col: 4, removeCol: True, sample ratio: 1
Undersampling
Splitting at 598, total rows: 748
trainX: (598, 14), trainY: (598,), testX: (150, 14), testY: (150,)
Scaler is fitted




Total: 150, Collisions: 77
Accuracy: 0.68, Precision: 0.6835443037974683, Recall: 0.7012987012987013, F1: 0.6923076923076923
Confusion matrix:
[[48 25]
 [23 54]]

Past importance: 2, sec before col: 4, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 748, total rows: 935
trainX: (748, 14), trainY: (748,), testX: (187, 14), testY: (187,)
Scaler is fitted




Total: 187, Collisions: 83
Accuracy: 0.7486631016042781, Precision: 0.725, Recall: 0.6987951807228916, F1: 0.7116564417177915
Confusion matrix:
[[82 22]
 [25 58]]

Past importance: 2, sec before col: 4, removeCol: True, sample ratio: 2
Undersampling
Splitting at 897, total rows: 1122
trainX: (897, 14), trainY: (897,), testX: (225, 14), testY: (225,)
Scaler is fitted




Total: 225, Collisions: 79
Accuracy: 0.7644444444444445, Precision: 0.6710526315789473, Recall: 0.6455696202531646, F1: 0.6580645161290323
Confusion matrix:
[[121  25]
 [ 28  51]]

Past importance: 2, sec before col: 5, removeCol: False, sample ratio: 1
Undersampling
Splitting at 1131, total rows: 1414
trainX: (1131, 14), trainY: (1131,), testX: (283, 14), testY: (283,)
Scaler is fitted




Total: 283, Collisions: 135
Accuracy: 0.7279151943462897, Precision: 0.7013888888888888, Recall: 0.7481481481481481, F1: 0.7240143369175627
Confusion matrix:
[[105  43]
 [ 34 101]]

Past importance: 2, sec before col: 5, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1413, total rows: 1767
trainX: (1413, 14), trainY: (1413,), testX: (354, 14), testY: (354,)
Scaler is fitted
Total: 354, Collisions: 138
Accuracy: 0.7711864406779662, Precision: 0.6862745098039216, Recall: 0.7608695652173914, F1: 0.7216494845360826
Confusion matrix:
[[168  48]
 [ 33 105]]

Past importance: 2, sec before col: 5, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1696, total rows: 2121
trainX: (1696, 14), trainY: (1696,), testX: (425, 14), testY: (425,)
Scaler is fitted
Total: 425, Collisions: 139
Accuracy: 0.7576470588235295, Precision: 0.625, Recall: 0.6474820143884892, F1: 0.6360424028268551
Confusion matrix:
[[232  54]
 [ 49  90]]

Past importance: 2, sec before col: 5, removeCo



Total: 197, Collisions: 99
Accuracy: 0.700507614213198, Precision: 0.696078431372549, Recall: 0.7171717171717171, F1: 0.7064676616915423
Confusion matrix:
[[67 31]
 [28 71]]

Past importance: 2, sec before col: 5, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 984, total rows: 1230
trainX: (984, 14), trainY: (984,), testX: (246, 14), testY: (246,)
Scaler is fitted




Total: 246, Collisions: 88
Accuracy: 0.6951219512195121, Precision: 0.5656565656565656, Recall: 0.6363636363636364, F1: 0.5989304812834224
Confusion matrix:
[[115  43]
 [ 32  56]]

Past importance: 2, sec before col: 5, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1180, total rows: 1476
trainX: (1180, 14), trainY: (1180,), testX: (296, 14), testY: (296,)
Scaler is fitted




Total: 296, Collisions: 99
Accuracy: 0.7601351351351351, Precision: 0.6320754716981132, Recall: 0.6767676767676768, F1: 0.653658536585366
Confusion matrix:
[[158  39]
 [ 32  67]]

Past importance: 2, sec before col: 6, removeCol: False, sample ratio: 1
Undersampling
Splitting at 1318, total rows: 1648
trainX: (1318, 14), trainY: (1318,), testX: (330, 14), testY: (330,)
Scaler is fitted




Total: 330, Collisions: 151
Accuracy: 0.7242424242424242, Precision: 0.6704545454545454, Recall: 0.7814569536423841, F1: 0.7217125382262998
Confusion matrix:
[[121  58]
 [ 33 118]]

Past importance: 2, sec before col: 6, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1648, total rows: 2060
trainX: (1648, 14), trainY: (1648,), testX: (412, 14), testY: (412,)
Scaler is fitted




Total: 412, Collisions: 143
Accuracy: 0.7475728155339806, Precision: 0.6089385474860335, Recall: 0.7622377622377622, F1: 0.6770186335403726
Confusion matrix:
[[199  70]
 [ 34 109]]

Past importance: 2, sec before col: 6, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1977, total rows: 2472
trainX: (1977, 14), trainY: (1977,), testX: (495, 14), testY: (495,)
Scaler is fitted
Total: 495, Collisions: 144
Accuracy: 0.8181818181818182, Precision: 0.6730769230769231, Recall: 0.7291666666666666, F1: 0.7
Confusion matrix:
[[300  51]
 [ 39 105]]

Past importance: 2, sec before col: 6, removeCol: True, sample ratio: 1
Undersampling
Splitting at 974, total rows: 1218
trainX: (974, 14), trainY: (974,), testX: (244, 14), testY: (244,)
Scaler is fitted




Total: 244, Collisions: 118
Accuracy: 0.7295081967213115, Precision: 0.7096774193548387, Recall: 0.7457627118644068, F1: 0.7272727272727273
Confusion matrix:
[[90 36]
 [30 88]]

Past importance: 2, sec before col: 6, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 1217, total rows: 1522
trainX: (1217, 14), trainY: (1217,), testX: (305, 14), testY: (305,)
Scaler is fitted
Total: 305, Collisions: 102
Accuracy: 0.7245901639344262, Precision: 0.5692307692307692, Recall: 0.7254901960784313, F1: 0.6379310344827587
Confusion matrix:
[[147  56]
 [ 28  74]]

Past importance: 2, sec before col: 6, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1461, total rows: 1827
trainX: (1461, 14), trainY: (1461,), testX: (366, 14), testY: (366,)
Scaler is fitted




Total: 366, Collisions: 117
Accuracy: 0.7622950819672131, Precision: 0.6102941176470589, Recall: 0.7094017094017094, F1: 0.6561264822134387
Confusion matrix:
[[196  53]
 [ 34  83]]

Past importance: 2, sec before col: 7, removeCol: False, sample ratio: 1
Undersampling
Splitting at 1500, total rows: 1876
trainX: (1500, 14), trainY: (1500,), testX: (376, 14), testY: (376,)
Scaler is fitted




Total: 376, Collisions: 194
Accuracy: 0.7021276595744681, Precision: 0.7070707070707071, Recall: 0.7216494845360825, F1: 0.7142857142857143
Confusion matrix:
[[124  58]
 [ 54 140]]

Past importance: 2, sec before col: 7, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1876, total rows: 2345
trainX: (1876, 14), trainY: (1876,), testX: (469, 14), testY: (469,)
Scaler is fitted




Total: 469, Collisions: 178
Accuracy: 0.7398720682302772, Precision: 0.6443298969072165, Recall: 0.702247191011236, F1: 0.6720430107526882
Confusion matrix:
[[222  69]
 [ 53 125]]

Past importance: 2, sec before col: 7, removeCol: False, sample ratio: 2
Undersampling
Splitting at 2251, total rows: 2814
trainX: (2251, 14), trainY: (2251,), testX: (563, 14), testY: (563,)
Scaler is fitted
Total: 563, Collisions: 153
Accuracy: 0.7779751332149201, Precision: 0.5813953488372093, Recall: 0.6535947712418301, F1: 0.6153846153846154
Confusion matrix:
[[338  72]
 [ 53 100]]

Past importance: 2, sec before col: 7, removeCol: True, sample ratio: 1
Undersampling
Splitting at 1156, total rows: 1446
trainX: (1156, 14), trainY: (1156,), testX: (290, 14), testY: (290,)
Scaler is fitted




Total: 290, Collisions: 149
Accuracy: 0.6310344827586207, Precision: 0.6438356164383562, Recall: 0.6308724832214765, F1: 0.6372881355932203
Confusion matrix:
[[89 52]
 [55 94]]

Past importance: 2, sec before col: 7, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 1445, total rows: 1807
trainX: (1445, 14), trainY: (1445,), testX: (362, 14), testY: (362,)
Scaler is fitted




Total: 362, Collisions: 129
Accuracy: 0.6878453038674033, Precision: 0.5512820512820513, Recall: 0.6666666666666666, F1: 0.6035087719298246
Confusion matrix:
[[163  70]
 [ 43  86]]

Past importance: 2, sec before col: 7, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1735, total rows: 2169
trainX: (1735, 14), trainY: (1735,), testX: (434, 14), testY: (434,)
Scaler is fitted
Total: 434, Collisions: 143
Accuracy: 0.7304147465437788, Precision: 0.5902777777777778, Recall: 0.5944055944055944, F1: 0.5923344947735192
Confusion matrix:
[[232  59]
 [ 58  85]]

Past importance: 3, sec before col: 2, removeCol: False, sample ratio: 1
Undersampling
Splitting at 552, total rows: 690
trainX: (552, 21), trainY: (552,), testX: (138, 21), testY: (138,)
Scaler is fitted




Total: 138, Collisions: 75
Accuracy: 0.8478260869565217, Precision: 0.875, Recall: 0.84, F1: 0.8571428571428572
Confusion matrix:
[[54  9]
 [12 63]]

Past importance: 3, sec before col: 2, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 689, total rows: 862
trainX: (689, 21), trainY: (689,), testX: (173, 21), testY: (173,)
Scaler is fitted




Total: 173, Collisions: 70
Accuracy: 0.838150289017341, Precision: 0.8, Recall: 0.8, F1: 0.8000000000000002
Confusion matrix:
[[89 14]
 [14 56]]

Past importance: 3, sec before col: 2, removeCol: False, sample ratio: 2
Undersampling
Splitting at 828, total rows: 1035
trainX: (828, 21), trainY: (828,), testX: (207, 21), testY: (207,)
Scaler is fitted
Total: 207, Collisions: 70
Accuracy: 0.8695652173913043, Precision: 0.7866666666666666, Recall: 0.8428571428571429, F1: 0.8137931034482757
Confusion matrix:
[[121  16]
 [ 11  59]]

Past importance: 3, sec before col: 2, removeCol: True, sample ratio: 1
Undersampling
Splitting at 208, total rows: 260
trainX: (208, 21), trainY: (208,), testX: (52, 21), testY: (52,)
Scaler is fitted
Total: 52, Collisions: 27
Accuracy: 0.7115384615384616, Precision: 0.7142857142857143, Recall: 0.7407407407407407, F1: 0.7272727272727273
Confusion matrix:
[[17  8]
 [ 7 20]]

Past importance: 3, sec before col: 2, removeCol: True, sample ratio: 1.5
Undersampling
S



Total: 78, Collisions: 20
Accuracy: 0.7692307692307693, Precision: 0.5384615384615384, Recall: 0.7, F1: 0.608695652173913
Confusion matrix:
[[46 12]
 [ 6 14]]

Past importance: 3, sec before col: 3, removeCol: False, sample ratio: 1
Undersampling
Splitting at 750, total rows: 938
trainX: (750, 21), trainY: (750,), testX: (188, 21), testY: (188,)
Scaler is fitted




Total: 188, Collisions: 105
Accuracy: 0.8085106382978723, Precision: 0.8415841584158416, Recall: 0.8095238095238095, F1: 0.8252427184466018
Confusion matrix:
[[67 16]
 [20 85]]

Past importance: 3, sec before col: 3, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 937, total rows: 1172
trainX: (937, 21), trainY: (937,), testX: (235, 21), testY: (235,)
Scaler is fitted
Total: 235, Collisions: 110
Accuracy: 0.8042553191489362, Precision: 0.8404255319148937, Recall: 0.7181818181818181, F1: 0.7745098039215685
Confusion matrix:
[[110  15]
 [ 31  79]]

Past importance: 3, sec before col: 3, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1125, total rows: 1407
trainX: (1125, 21), trainY: (1125,), testX: (282, 21), testY: (282,)
Scaler is fitted




Total: 282, Collisions: 101
Accuracy: 0.7836879432624113, Precision: 0.7, Recall: 0.693069306930693, F1: 0.6965174129353233
Confusion matrix:
[[151  30]
 [ 31  70]]

Past importance: 3, sec before col: 3, removeCol: True, sample ratio: 1
Undersampling
Splitting at 406, total rows: 508
trainX: (406, 21), trainY: (406,), testX: (102, 21), testY: (102,)
Scaler is fitted
Total: 102, Collisions: 58
Accuracy: 0.7745098039215687, Precision: 0.8431372549019608, Recall: 0.7413793103448276, F1: 0.7889908256880734
Confusion matrix:
[[36  8]
 [15 43]]

Past importance: 3, sec before col: 3, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 508, total rows: 635
trainX: (508, 21), trainY: (508,), testX: (127, 21), testY: (127,)
Scaler is fitted




Total: 127, Collisions: 48
Accuracy: 0.7322834645669292, Precision: 0.625, Recall: 0.7291666666666666, F1: 0.6730769230769231
Confusion matrix:
[[58 21]
 [13 35]]

Past importance: 3, sec before col: 3, removeCol: True, sample ratio: 2
Undersampling
Splitting at 609, total rows: 762
trainX: (609, 21), trainY: (609,), testX: (153, 21), testY: (153,)
Scaler is fitted
Total: 153, Collisions: 53
Accuracy: 0.7581699346405228, Precision: 0.6481481481481481, Recall: 0.660377358490566, F1: 0.6542056074766356
Confusion matrix:
[[81 19]
 [18 35]]

Past importance: 3, sec before col: 4, removeCol: False, sample ratio: 1
Undersampling
Splitting at 942, total rows: 1178
trainX: (942, 21), trainY: (942,), testX: (236, 21), testY: (236,)
Scaler is fitted




Total: 236, Collisions: 130
Accuracy: 0.6991525423728814, Precision: 0.736, Recall: 0.7076923076923077, F1: 0.7215686274509805
Confusion matrix:
[[73 33]
 [38 92]]

Past importance: 3, sec before col: 4, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1177, total rows: 1472
trainX: (1177, 21), trainY: (1177,), testX: (295, 21), testY: (295,)
Scaler is fitted




Total: 295, Collisions: 116
Accuracy: 0.7491525423728813, Precision: 0.6666666666666666, Recall: 0.7241379310344828, F1: 0.6942148760330579
Confusion matrix:
[[137  42]
 [ 32  84]]

Past importance: 3, sec before col: 4, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1413, total rows: 1767
trainX: (1413, 21), trainY: (1413,), testX: (354, 21), testY: (354,)
Scaler is fitted
Total: 354, Collisions: 116
Accuracy: 0.7768361581920904, Precision: 0.6390977443609023, Recall: 0.7327586206896551, F1: 0.6827309236947791
Confusion matrix:
[[190  48]
 [ 31  85]]

Past importance: 3, sec before col: 4, removeCol: True, sample ratio: 1
Undersampling
Splitting at 598, total rows: 748
trainX: (598, 21), trainY: (598,), testX: (150, 21), testY: (150,)
Scaler is fitted




Total: 150, Collisions: 81
Accuracy: 0.6666666666666666, Precision: 0.6962025316455697, Recall: 0.6790123456790124, F1: 0.6875000000000001
Confusion matrix:
[[45 24]
 [26 55]]

Past importance: 3, sec before col: 4, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 748, total rows: 935
trainX: (748, 21), trainY: (748,), testX: (187, 21), testY: (187,)
Scaler is fitted




Total: 187, Collisions: 75
Accuracy: 0.7272727272727273, Precision: 0.65, Recall: 0.6933333333333334, F1: 0.670967741935484
Confusion matrix:
[[84 28]
 [23 52]]

Past importance: 3, sec before col: 4, removeCol: True, sample ratio: 2
Undersampling
Splitting at 897, total rows: 1122
trainX: (897, 21), trainY: (897,), testX: (225, 21), testY: (225,)
Scaler is fitted




Total: 225, Collisions: 75
Accuracy: 0.7555555555555555, Precision: 0.631578947368421, Recall: 0.64, F1: 0.6357615894039735
Confusion matrix:
[[122  28]
 [ 27  48]]

Past importance: 3, sec before col: 5, removeCol: False, sample ratio: 1
Undersampling
Splitting at 1131, total rows: 1414
trainX: (1131, 21), trainY: (1131,), testX: (283, 21), testY: (283,)
Scaler is fitted




Total: 283, Collisions: 120
Accuracy: 0.7314487632508834, Precision: 0.6527777777777778, Recall: 0.7833333333333333, F1: 0.7121212121212122
Confusion matrix:
[[113  50]
 [ 26  94]]

Past importance: 3, sec before col: 5, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1413, total rows: 1767
trainX: (1413, 21), trainY: (1413,), testX: (354, 21), testY: (354,)
Scaler is fitted
Total: 354, Collisions: 130
Accuracy: 0.8050847457627118, Precision: 0.6993464052287581, Recall: 0.823076923076923, F1: 0.7561837455830388
Confusion matrix:
[[178  46]
 [ 23 107]]

Past importance: 3, sec before col: 5, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1696, total rows: 2121
trainX: (1696, 21), trainY: (1696,), testX: (425, 21), testY: (425,)
Scaler is fitted




Total: 425, Collisions: 136
Accuracy: 0.7505882352941177, Precision: 0.6041666666666666, Recall: 0.6397058823529411, F1: 0.6214285714285714
Confusion matrix:
[[232  57]
 [ 49  87]]

Past importance: 3, sec before col: 5, removeCol: True, sample ratio: 1
Undersampling
Splitting at 787, total rows: 984
trainX: (787, 21), trainY: (787,), testX: (197, 21), testY: (197,)
Scaler is fitted




Total: 197, Collisions: 102
Accuracy: 0.7360406091370558, Precision: 0.7450980392156863, Recall: 0.7450980392156863, F1: 0.7450980392156863
Confusion matrix:
[[69 26]
 [26 76]]

Past importance: 3, sec before col: 5, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 984, total rows: 1230
trainX: (984, 21), trainY: (984,), testX: (246, 21), testY: (246,)
Scaler is fitted




Total: 246, Collisions: 90
Accuracy: 0.7276422764227642, Precision: 0.6161616161616161, Recall: 0.6777777777777778, F1: 0.6455026455026455
Confusion matrix:
[[118  38]
 [ 29  61]]

Past importance: 3, sec before col: 5, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1180, total rows: 1476
trainX: (1180, 21), trainY: (1180,), testX: (296, 21), testY: (296,)
Scaler is fitted




Total: 296, Collisions: 99
Accuracy: 0.7533783783783784, Precision: 0.6226415094339622, Recall: 0.6666666666666666, F1: 0.6439024390243903
Confusion matrix:
[[157  40]
 [ 33  66]]

Past importance: 3, sec before col: 6, removeCol: False, sample ratio: 1
Undersampling
Splitting at 1318, total rows: 1648
trainX: (1318, 21), trainY: (1318,), testX: (330, 21), testY: (330,)
Scaler is fitted




Total: 330, Collisions: 175
Accuracy: 0.6878787878787879, Precision: 0.7045454545454546, Recall: 0.7085714285714285, F1: 0.7065527065527065
Confusion matrix:
[[103  52]
 [ 51 124]]

Past importance: 3, sec before col: 6, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1648, total rows: 2060
trainX: (1648, 21), trainY: (1648,), testX: (412, 21), testY: (412,)
Scaler is fitted
Total: 412, Collisions: 144
Accuracy: 0.7305825242718447, Precision: 0.5921787709497207, Recall: 0.7361111111111112, F1: 0.6563467492260063
Confusion matrix:
[[195  73]
 [ 38 106]]

Past importance: 3, sec before col: 6, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1977, total rows: 2472
trainX: (1977, 21), trainY: (1977,), testX: (495, 21), testY: (495,)
Scaler is fitted
Total: 495, Collisions: 157
Accuracy: 0.7434343434343434, Precision: 0.5961538461538461, Recall: 0.5923566878980892, F1: 0.5942492012779553
Confusion matrix:
[[275  63]
 [ 64  93]]

Past importance: 3, sec before col



Total: 244, Collisions: 126
Accuracy: 0.6721311475409836, Precision: 0.6854838709677419, Recall: 0.6746031746031746, F1: 0.68
Confusion matrix:
[[79 39]
 [41 85]]

Past importance: 3, sec before col: 6, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 1217, total rows: 1522
trainX: (1217, 21), trainY: (1217,), testX: (305, 21), testY: (305,)
Scaler is fitted
Total: 305, Collisions: 125
Accuracy: 0.6885245901639344, Precision: 0.6153846153846154, Recall: 0.64, F1: 0.6274509803921569
Confusion matrix:
[[130  50]
 [ 45  80]]

Past importance: 3, sec before col: 6, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1461, total rows: 1827
trainX: (1461, 21), trainY: (1461,), testX: (366, 21), testY: (366,)
Scaler is fitted




Total: 366, Collisions: 102
Accuracy: 0.7103825136612022, Precision: 0.4852941176470588, Recall: 0.6470588235294118, F1: 0.5546218487394958
Confusion matrix:
[[194  70]
 [ 36  66]]

Past importance: 3, sec before col: 7, removeCol: False, sample ratio: 1
Undersampling
Splitting at 1500, total rows: 1876
trainX: (1500, 21), trainY: (1500,), testX: (376, 21), testY: (376,)
Scaler is fitted




Total: 376, Collisions: 175
Accuracy: 0.7313829787234043, Precision: 0.6868686868686869, Recall: 0.7771428571428571, F1: 0.7292225201072386
Confusion matrix:
[[139  62]
 [ 39 136]]

Past importance: 3, sec before col: 7, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1876, total rows: 2345
trainX: (1876, 21), trainY: (1876,), testX: (469, 21), testY: (469,)
Scaler is fitted
Total: 469, Collisions: 195
Accuracy: 0.6993603411513859, Precision: 0.6391752577319587, Recall: 0.6358974358974359, F1: 0.6375321336760925
Confusion matrix:
[[204  70]
 [ 71 124]]

Past importance: 3, sec before col: 7, removeCol: False, sample ratio: 2
Undersampling
Splitting at 2251, total rows: 2814
trainX: (2251, 21), trainY: (2251,), testX: (563, 21), testY: (563,)
Scaler is fitted
Total: 563, Collisions: 174
Accuracy: 0.7513321492007105, Precision: 0.5988372093023255, Recall: 0.5919540229885057, F1: 0.5953757225433525
Confusion matrix:
[[320  69]
 [ 71 103]]

Past importance: 3, sec before col



Total: 290, Collisions: 126
Accuracy: 0.6551724137931034, Precision: 0.589041095890411, Recall: 0.6825396825396826, F1: 0.6323529411764707
Confusion matrix:
[[104  60]
 [ 40  86]]

Past importance: 3, sec before col: 7, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 1445, total rows: 1807
trainX: (1445, 21), trainY: (1445,), testX: (362, 21), testY: (362,)
Scaler is fitted




Total: 362, Collisions: 127
Accuracy: 0.7375690607734806, Precision: 0.6025641025641025, Recall: 0.7401574803149606, F1: 0.6643109540636042
Confusion matrix:
[[173  62]
 [ 33  94]]

Past importance: 3, sec before col: 7, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1735, total rows: 2169
trainX: (1735, 21), trainY: (1735,), testX: (434, 21), testY: (434,)
Scaler is fitted
Total: 434, Collisions: 137
Accuracy: 0.7073732718894009, Precision: 0.5347222222222222, Recall: 0.5620437956204379, F1: 0.5480427046263345
Confusion matrix:
[[230  67]
 [ 60  77]]

Past importance: 4, sec before col: 2, removeCol: False, sample ratio: 1
Undersampling
Splitting at 552, total rows: 690
trainX: (552, 28), trainY: (552,), testX: (138, 28), testY: (138,)
Scaler is fitted




Total: 138, Collisions: 76
Accuracy: 0.8405797101449275, Precision: 0.875, Recall: 0.8289473684210527, F1: 0.8513513513513513
Confusion matrix:
[[53  9]
 [13 63]]

Past importance: 4, sec before col: 2, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 689, total rows: 862
trainX: (689, 28), trainY: (689,), testX: (173, 28), testY: (173,)
Scaler is fitted
Total: 173, Collisions: 65
Accuracy: 0.7976878612716763, Precision: 0.7142857142857143, Recall: 0.7692307692307693, F1: 0.7407407407407408
Confusion matrix:
[[88 20]
 [15 50]]

Past importance: 4, sec before col: 2, removeCol: False, sample ratio: 2
Undersampling
Splitting at 828, total rows: 1035
trainX: (828, 28), trainY: (828,), testX: (207, 28), testY: (207,)
Scaler is fitted
Total: 207, Collisions: 73
Accuracy: 0.8454106280193237, Precision: 0.7733333333333333, Recall: 0.7945205479452054, F1: 0.7837837837837838
Confusion matrix:
[[117  17]
 [ 15  58]]

Past importance: 4, sec before col: 2, removeCol: True, sample ra



Total: 235, Collisions: 95
Accuracy: 0.7829787234042553, Precision: 0.7340425531914894, Recall: 0.7263157894736842, F1: 0.7301587301587302
Confusion matrix:
[[115  25]
 [ 26  69]]

Past importance: 4, sec before col: 3, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1125, total rows: 1407
trainX: (1125, 28), trainY: (1125,), testX: (282, 28), testY: (282,)
Scaler is fitted




Total: 282, Collisions: 105
Accuracy: 0.7907801418439716, Precision: 0.73, Recall: 0.6952380952380952, F1: 0.7121951219512195
Confusion matrix:
[[150  27]
 [ 32  73]]

Past importance: 4, sec before col: 3, removeCol: True, sample ratio: 1
Undersampling
Splitting at 406, total rows: 508
trainX: (406, 28), trainY: (406,), testX: (102, 28), testY: (102,)
Scaler is fitted
Total: 102, Collisions: 62
Accuracy: 0.7745098039215687, Precision: 0.8823529411764706, Recall: 0.7258064516129032, F1: 0.7964601769911503
Confusion matrix:
[[34  6]
 [17 45]]

Past importance: 4, sec before col: 3, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 508, total rows: 635
trainX: (508, 28), trainY: (508,), testX: (127, 28), testY: (127,)
Scaler is fitted




Total: 127, Collisions: 58
Accuracy: 0.7480314960629921, Precision: 0.7321428571428571, Recall: 0.7068965517241379, F1: 0.719298245614035
Confusion matrix:
[[54 15]
 [17 41]]

Past importance: 4, sec before col: 3, removeCol: True, sample ratio: 2
Undersampling
Splitting at 609, total rows: 762
trainX: (609, 28), trainY: (609,), testX: (153, 28), testY: (153,)
Scaler is fitted
Total: 153, Collisions: 56
Accuracy: 0.738562091503268, Precision: 0.6481481481481481, Recall: 0.625, F1: 0.6363636363636364
Confusion matrix:
[[78 19]
 [21 35]]

Past importance: 4, sec before col: 4, removeCol: False, sample ratio: 1
Undersampling
Splitting at 942, total rows: 1178
trainX: (942, 28), trainY: (942,), testX: (236, 28), testY: (236,)
Scaler is fitted




Total: 236, Collisions: 126
Accuracy: 0.690677966101695, Precision: 0.712, Recall: 0.7063492063492064, F1: 0.7091633466135459
Confusion matrix:
[[74 36]
 [37 89]]

Past importance: 4, sec before col: 4, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1177, total rows: 1472
trainX: (1177, 28), trainY: (1177,), testX: (295, 28), testY: (295,)
Scaler is fitted




Total: 295, Collisions: 109
Accuracy: 0.7254237288135593, Precision: 0.6111111111111112, Recall: 0.7064220183486238, F1: 0.6553191489361703
Confusion matrix:
[[137  49]
 [ 32  77]]

Past importance: 4, sec before col: 4, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1413, total rows: 1767
trainX: (1413, 28), trainY: (1413,), testX: (354, 28), testY: (354,)
Scaler is fitted
Total: 354, Collisions: 110
Accuracy: 0.7768361581920904, Precision: 0.6165413533834586, Recall: 0.7454545454545455, F1: 0.6748971193415638
Confusion matrix:
[[193  51]
 [ 28  82]]

Past importance: 4, sec before col: 4, removeCol: True, sample ratio: 1
Undersampling
Splitting at 598, total rows: 748
trainX: (598, 28), trainY: (598,), testX: (150, 28), testY: (150,)
Scaler is fitted




Total: 150, Collisions: 70
Accuracy: 0.6466666666666666, Precision: 0.6075949367088608, Recall: 0.6857142857142857, F1: 0.6442953020134229
Confusion matrix:
[[49 31]
 [22 48]]

Past importance: 4, sec before col: 4, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 748, total rows: 935
trainX: (748, 28), trainY: (748,), testX: (187, 28), testY: (187,)
Scaler is fitted




Total: 187, Collisions: 77
Accuracy: 0.7165775401069518, Precision: 0.65, Recall: 0.6753246753246753, F1: 0.6624203821656051
Confusion matrix:
[[82 28]
 [25 52]]

Past importance: 4, sec before col: 4, removeCol: True, sample ratio: 2
Undersampling
Splitting at 897, total rows: 1122
trainX: (897, 28), trainY: (897,), testX: (225, 28), testY: (225,)
Scaler is fitted
Total: 225, Collisions: 88
Accuracy: 0.7333333333333333, Precision: 0.6842105263157895, Recall: 0.5909090909090909, F1: 0.6341463414634148
Confusion matrix:
[[113  24]
 [ 36  52]]

Past importance: 4, sec before col: 5, removeCol: False, sample ratio: 1
Undersampling
Splitting at 1131, total rows: 1414
trainX: (1131, 28), trainY: (1131,), testX: (283, 28), testY: (283,)
Scaler is fitted




Total: 283, Collisions: 143
Accuracy: 0.7137809187279152, Precision: 0.7152777777777778, Recall: 0.7202797202797203, F1: 0.7177700348432056
Confusion matrix:
[[ 99  41]
 [ 40 103]]

Past importance: 4, sec before col: 5, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1413, total rows: 1767
trainX: (1413, 28), trainY: (1413,), testX: (354, 28), testY: (354,)
Scaler is fitted
Total: 354, Collisions: 124
Accuracy: 0.7711864406779662, Precision: 0.6405228758169934, Recall: 0.7903225806451613, F1: 0.7075812274368231
Confusion matrix:
[[175  55]
 [ 26  98]]

Past importance: 4, sec before col: 5, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1696, total rows: 2121
trainX: (1696, 28), trainY: (1696,), testX: (425, 28), testY: (425,)
Scaler is fitted
Total: 425, Collisions: 115
Accuracy: 0.7858823529411765, Precision: 0.5833333333333334, Recall: 0.7304347826086957, F1: 0.6486486486486488
Confusion matrix:
[[250  60]
 [ 31  84]]

Past importance: 4, sec before col



Total: 197, Collisions: 88
Accuracy: 0.6751269035532995, Precision: 0.6176470588235294, Recall: 0.7159090909090909, F1: 0.6631578947368422
Confusion matrix:
[[70 39]
 [25 63]]

Past importance: 4, sec before col: 5, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 984, total rows: 1230
trainX: (984, 28), trainY: (984,), testX: (246, 28), testY: (246,)
Scaler is fitted
Total: 246, Collisions: 88
Accuracy: 0.6869918699186992, Precision: 0.5555555555555556, Recall: 0.625, F1: 0.5882352941176471
Confusion matrix:
[[114  44]
 [ 33  55]]

Past importance: 4, sec before col: 5, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1180, total rows: 1476
trainX: (1180, 28), trainY: (1180,), testX: (296, 28), testY: (296,)
Scaler is fitted
Total: 296, Collisions: 93
Accuracy: 0.7804054054054054, Precision: 0.6320754716981132, Recall: 0.7204301075268817, F1: 0.6733668341708542
Confusion matrix:
[[164  39]
 [ 26  67]]

Past importance: 4, sec before col: 6, removeCol: False, sa



Total: 330, Collisions: 164
Accuracy: 0.7333333333333333, Precision: 0.7159090909090909, Recall: 0.7682926829268293, F1: 0.7411764705882352
Confusion matrix:
[[116  50]
 [ 38 126]]

Past importance: 4, sec before col: 6, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1648, total rows: 2060
trainX: (1648, 28), trainY: (1648,), testX: (412, 28), testY: (412,)
Scaler is fitted




Total: 412, Collisions: 152
Accuracy: 0.7257281553398058, Precision: 0.6089385474860335, Recall: 0.7171052631578947, F1: 0.6586102719033231
Confusion matrix:
[[190  70]
 [ 43 109]]

Past importance: 4, sec before col: 6, removeCol: False, sample ratio: 2
Undersampling
Splitting at 1977, total rows: 2472
trainX: (1977, 28), trainY: (1977,), testX: (495, 28), testY: (495,)
Scaler is fitted
Total: 495, Collisions: 162
Accuracy: 0.7616161616161616, Precision: 0.6410256410256411, Recall: 0.6172839506172839, F1: 0.6289308176100629
Confusion matrix:
[[277  56]
 [ 62 100]]

Past importance: 4, sec before col: 6, removeCol: True, sample ratio: 1
Undersampling
Splitting at 974, total rows: 1218
trainX: (974, 28), trainY: (974,), testX: (244, 28), testY: (244,)
Scaler is fitted
Total: 244, Collisions: 123
Accuracy: 0.6844262295081968, Precision: 0.6854838709677419, Recall: 0.6910569105691057, F1: 0.6882591093117408
Confusion matrix:
[[82 39]
 [38 85]]

Past importance: 4, sec before col: 6, remov



Total: 376, Collisions: 169
Accuracy: 0.6888297872340425, Precision: 0.6313131313131313, Recall: 0.7396449704142012, F1: 0.6811989100817438
Confusion matrix:
[[134  73]
 [ 44 125]]

Past importance: 4, sec before col: 7, removeCol: False, sample ratio: 1.5
Undersampling
Splitting at 1876, total rows: 2345
trainX: (1876, 28), trainY: (1876,), testX: (469, 28), testY: (469,)
Scaler is fitted




Total: 469, Collisions: 174
Accuracy: 0.7057569296375267, Precision: 0.5927835051546392, Recall: 0.6609195402298851, F1: 0.625
Confusion matrix:
[[216  79]
 [ 59 115]]

Past importance: 4, sec before col: 7, removeCol: False, sample ratio: 2
Undersampling
Splitting at 2251, total rows: 2814
trainX: (2251, 28), trainY: (2251,), testX: (563, 28), testY: (563,)
Scaler is fitted
Total: 563, Collisions: 180
Accuracy: 0.7264653641207816, Precision: 0.5755813953488372, Recall: 0.55, F1: 0.5625000000000001
Confusion matrix:
[[310  73]
 [ 81  99]]

Past importance: 4, sec before col: 7, removeCol: True, sample ratio: 1
Undersampling
Splitting at 1156, total rows: 1446
trainX: (1156, 28), trainY: (1156,), testX: (290, 28), testY: (290,)
Scaler is fitted




Total: 290, Collisions: 133
Accuracy: 0.6517241379310345, Precision: 0.6095890410958904, Recall: 0.6691729323308271, F1: 0.6379928315412187
Confusion matrix:
[[100  57]
 [ 44  89]]

Past importance: 4, sec before col: 7, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 1445, total rows: 1807
trainX: (1445, 28), trainY: (1445,), testX: (362, 28), testY: (362,)
Scaler is fitted
Total: 362, Collisions: 123
Accuracy: 0.6988950276243094, Precision: 0.5448717948717948, Recall: 0.6910569105691057, F1: 0.6093189964157706
Confusion matrix:
[[168  71]
 [ 38  85]]

Past importance: 4, sec before col: 7, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1735, total rows: 2169
trainX: (1735, 28), trainY: (1735,), testX: (434, 28), testY: (434,)
Scaler is fitted
Total: 434, Collisions: 136
Accuracy: 0.7142857142857143, Precision: 0.5416666666666666, Recall: 0.5735294117647058, F1: 0.557142857142857
Confusion matrix:
[[232  66]
 [ 58  78]]

Past importance: 5, sec before col: 2



Total: 244, Collisions: 125
Accuracy: 0.6352459016393442, Precision: 0.6451612903225806, Recall: 0.64, F1: 0.642570281124498
Confusion matrix:
[[75 44]
 [45 80]]

Past importance: 5, sec before col: 6, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 1217, total rows: 1522
trainX: (1217, 35), trainY: (1217,), testX: (305, 35), testY: (305,)
Scaler is fitted
Total: 305, Collisions: 122
Accuracy: 0.7311475409836066, Precision: 0.6538461538461539, Recall: 0.6967213114754098, F1: 0.6746031746031745
Confusion matrix:
[[138  45]
 [ 37  85]]

Past importance: 5, sec before col: 6, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1461, total rows: 1827
trainX: (1461, 35), trainY: (1461,), testX: (366, 35), testY: (366,)
Scaler is fitted
Total: 366, Collisions: 108
Accuracy: 0.726775956284153, Precision: 0.5294117647058824, Recall: 0.6666666666666666, F1: 0.5901639344262295
Confusion matrix:
[[194  64]
 [ 36  72]]

Past importance: 5, sec before col: 7, removeCol: False,



Total: 290, Collisions: 141
Accuracy: 0.6724137931034483, Precision: 0.6575342465753424, Recall: 0.6808510638297872, F1: 0.6689895470383275
Confusion matrix:
[[99 50]
 [45 96]]

Past importance: 5, sec before col: 7, removeCol: True, sample ratio: 1.5
Undersampling
Splitting at 1445, total rows: 1807
trainX: (1445, 35), trainY: (1445,), testX: (362, 35), testY: (362,)
Scaler is fitted
Total: 362, Collisions: 144
Accuracy: 0.712707182320442, Precision: 0.6282051282051282, Recall: 0.6805555555555556, F1: 0.6533333333333333
Confusion matrix:
[[160  58]
 [ 46  98]]

Past importance: 5, sec before col: 7, removeCol: True, sample ratio: 2
Undersampling
Splitting at 1735, total rows: 2169
trainX: (1735, 35), trainY: (1735,), testX: (434, 35), testY: (434,)
Scaler is fitted
Total: 434, Collisions: 143
Accuracy: 0.7396313364055299, Precision: 0.6041666666666666, Recall: 0.6083916083916084, F1: 0.6062717770034843
Confusion matrix:
[[234  57]
 [ 56  87]]

Past importance: 6, sec before col: 2, re

In [16]:
# data = makeDataUsable(read_csv("../../data/testGenerateData.csv"), 6, 4)
dataToUse = data.copy()
dataToUse.drop(columns=data.columns[0], axis=1, inplace=True)
# print(data.head(10))
trainX, trainY, testX, testY = splitTrainTest(dataToUse)
print(f"trainX:{trainX.shape}, trainY:{trainY.shape}, testX:{testX.shape}, testY:{testY.shape}")
# print(trainX.head())
# print(f"trainX: {type(trainX)}, trainY: {type(trainY)}, testX: {type(testX)}, testY: {type(testY)}")


splitting at 215.
trainX:(215, 42), trainY:(215,), testX:(54, 42), testY:(54,)
trainX: <class 'pandas.core.frame.DataFrame'>, trainY: <class 'pandas.core.series.Series'>, testX: <class 'pandas.core.frame.DataFrame'>, testY: <class 'pandas.core.series.Series'>


In [17]:

# p = NewPredicter.loadModel("xgb_2_582-11-16-201")
# print(p.__dict__)

p = NewPredicter()
trainXpp = p.preProcess(trainX)
trainYpp = trainY.to_numpy()
testXpp = p.preProcess(testX)
testYpp = testY.to_numpy()
print(f"trainX: {type(trainXpp)}, trainY: {type(trainYpp)}, testX: {type(testXpp)}, testY: {type(testYpp)}")
# print(trainXpp)
p.fit(trainXpp, trainYpp)

# p.predict([])

Scaler is fitted
trainX: <class 'numpy.ndarray'>, trainY: <class 'numpy.ndarray'>, testX: <class 'numpy.ndarray'>, testY: <class 'numpy.ndarray'>


In [18]:
pred = []
for i in range(len(testX)):
    pred.append(p.predict(testX.iloc[i]))
print(pred)

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]


In [19]:
p.getScore(pred, testY)

Total: 54, number of collisions: 14
	TN: 36 	| FP: 4 
	FN: 4 	| TP: 10
Accuracy: 0.85
Precision: 0.71
Recall: 0.71
F1: 0.71


[[36, 4], [4, 10]]