In [1]:
import CalculatedFieldSubroutines as cfs

#

import numpy as np

import pandas as pd

import random

#

import matplotlib.pyplot as plt

from pandasgui import show

#

import warnings

#

import xgboost as xgb

#

import os

In [2]:
warnings.filterwarnings( 'ignore' )

In [3]:
gmID_list = cfs.list_whitelisted_gmIDs_with_traffic_data()

topic_list = cfs.list_topics()

print( topic_list )

['/apollo/canbus/chassis', '/apollo/drive/event', '/apollo/sensor/gnss/best/pose', '/apollo/perception/traffic/light']


In [4]:
moving_window = 0 # seconds

expansion_window = 1e9 # nanoseconds

#

red_preprocessed_dfs = []

green_preprocessed_dfs = []

blue_preprocessed_dfs = []

for gmID in gmID_list:

    preprocessed_df = cfs.retrieve_gmID_preprocessed_moving_data( gmID, window_seconds = moving_window )

    #

    cfs.BinaryDisengagementExpanded( preprocessed_df, moving_colname = 'time', window = expansion_window )

    #

    cfs.DisengagementID( preprocessed_df, expanded = False )

    cfs.DisengagementID( preprocessed_df, expanded = True )

    #

    if ( cfs.give_route( gmID ) == 'Red' ):

        red_preprocessed_dfs.append( preprocessed_df )

    elif ( cfs.give_route( gmID ) == 'Green' ):

        green_preprocessed_dfs.append( preprocessed_df )

    elif ( cfs.give_route( gmID ) == 'Blue' ):

        blue_preprocessed_dfs.append( preprocessed_df )

In [5]:
training_red_dfs, testing_red_dfs = cfs.random_list_split( red_preprocessed_dfs, split_percentage = 0.8 )

training_red_df = pd.concat( training_red_dfs )

testing_red_df = pd.concat( testing_red_dfs )

#

X_colnames = [ 'speedMps', 'brakePercentage', 'throttlePercentage', 'steeringPercentage', 'LatLonTotalStdDev', \
               'TernaryTurnSignal', 'BinaryContainLights' ]

Y_colname = 'BinaryDisengagementExpanded'

#

X_train = training_red_df[ X_colnames ]

Y_train = training_red_df[ Y_colname ]

#

X_test = testing_red_df[ X_colnames ]

Y_test = testing_red_df[ Y_colname ]

In [6]:
model = xgb.XGBClassifier( objective = 'binary:logistic', 
                           early_stopping_rounds = 10, 
                           eval_metric = 'aucpr',
                           verbose = True,
                           seed = 0 )

model.fit( X_train, Y_train, eval_set = [ ( X_test, Y_test ) ] )

[0]	validation_0-aucpr:0.06364
[1]	validation_0-aucpr:0.08565
[2]	validation_0-aucpr:0.09285
[3]	validation_0-aucpr:0.09194
[4]	validation_0-aucpr:0.09230
[5]	validation_0-aucpr:0.09370
[6]	validation_0-aucpr:0.09416
[7]	validation_0-aucpr:0.09570
[8]	validation_0-aucpr:0.09429
[9]	validation_0-aucpr:0.09044
[10]	validation_0-aucpr:0.09116
[11]	validation_0-aucpr:0.09219
[12]	validation_0-aucpr:0.09158
[13]	validation_0-aucpr:0.06949
[14]	validation_0-aucpr:0.07265
[15]	validation_0-aucpr:0.07354
[16]	validation_0-aucpr:0.07633
[17]	validation_0-aucpr:0.07974


In [7]:
Y_pred = model.predict( X_test )

In [8]:
def ML_metrics( confusion_matrix_values, display = False ):

    TP, TN, FP, FN = confusion_matrix_values

    #

    precision = TP / ( TP + FP )

    recall = TP / ( TP + FN )

    tnr = TN / ( TN + FP )

    #

    accuracy = ( TP + TN ) / ( TP + TN + FP + FN )

    balanced_accuracy = ( recall + tnr ) / 2

    if ( display == True ):

        print( f'False_Negatives: { FN }, True_Positives: { TP }' )
    
        print( f'True_Negatives: { TN }, False_Positives: { FP }' )

        print( '' )

        print( f'Precision: { precision:.3f}' )

        print( f'Recall/True_Positive_Rate: { recall:.3f}' )

        print( f'True_Negative_Rate: { tnr:.3f}' )

        print( '' )

        print( f'Accuracy: { accuracy:.3f}' )

        print( f'Balanced_Accuracy: { balanced_accuracy:.3f}' )

    return TP, TN, FP, FN, precision, recall, tnr, accuracy, balanced_accuracy

In [9]:
def unique_disengagement_accuracy( Y_pred, Y_test, test_DisengagementExpandedID_col, display = False ):

    temp_df = pd.DataFrame()

    #

    temp_df[ 'Y_pred' ] = list( Y_pred ) 

    temp_df[ 'Y_test' ] = list( Y_test )

    #

    temp_df[ 'DisengagementExpandedID' ] = list( test_DisengagementExpandedID_col )

    #

    temp_df = temp_df[ temp_df[ 'DisengagementExpandedID' ] != 'NAD' ]

    #

    DisengagementID_col = []

    for ExpandedID in temp_df[ 'DisengagementExpandedID' ]:

        index = ExpandedID.index( 'ED' )

        ID = ExpandedID[ : index - 1 ]

        DisengagementID_col.append( ID )

    temp_df[ 'DisengagementID' ] = DisengagementID_col

    #

    unique_DisengagementIDs_wCounts = dict( temp_df[ 'DisengagementID' ].value_counts() )

    num_of_unique_DisengagementIDs = len( unique_DisengagementIDs_wCounts.keys() )

    #

    temp_df = temp_df[ ( temp_df[ 'Y_pred'] == 1 ) & ( temp_df[ 'Y_test' ] == 1 ) ]

    #

    unique_TP_DisengagementIDs_wCounts = dict( temp_df[ 'DisengagementID' ].value_counts() )

    num_of_unique_TP_DisengagementIDs = len( unique_TP_DisengagementIDs_wCounts.keys() )

    #

    unique_disengagement_accu = num_of_unique_TP_DisengagementIDs / num_of_unique_DisengagementIDs

    #

    individual_disengagement_accuracies = {}

    for ID in unique_DisengagementIDs_wCounts.keys():

        current_disengagement_count = unique_DisengagementIDs_wCounts[ ID ]

        try:

            current_TP_disengagement_count = unique_TP_DisengagementIDs_wCounts[ ID ]

        except:

            current_TP_disengagement_count = 0

        individual_disengagement_accuracies[ ID ] = current_TP_disengagement_count / current_disengagement_count

    individual_disengagement_accuracies = dict( sorted( individual_disengagement_accuracies.items(), \
                                                        key = lambda item : item[ 1 ], reverse = True ) )

    #

    if ( display == True ):

        print( f'Unique Disengagement Accuracy: { unique_disengagement_accu:.3f}' )

        print( f'# of Unique Testing Disengagements w/ a True Positive: { num_of_unique_TP_DisengagementIDs }' )

        print( f'# of Unique Testing Disengagements: { num_of_unique_DisengagementIDs }' )

    #

    return unique_disengagement_accu, num_of_unique_TP_DisengagementIDs, num_of_unique_DisengagementIDs, \
           individual_disengagement_accuracies, unique_TP_DisengagementIDs_wCounts, unique_DisengagementIDs_wCounts

In [10]:
_ = ML_metrics( cfs.confusion_matrix_values( Y_pred, Y_test ), display = True )

False_Negatives: 3695, True_Positives: 207
True_Negatives: 833525, False_Positives: 32

Precision: 0.866
Recall/True_Positive_Rate: 0.053
True_Negative_Rate: 1.000

Accuracy: 0.996
Balanced_Accuracy: 0.527


In [11]:
_ = unique_disengagement_accuracy( Y_pred, Y_test, testing_red_df[ 'DisengagementExpandedID' ], display = True )

Unique Disengagement Accuracy: 0.214
# of Unique Testing Disengagements w/ a True Positive: 22
# of Unique Testing Disengagements: 103


In [None]:
metric_list = [ [] for i in range( 12 ) ]

for split in range( 10 ):

    print( f'Split: { split }' )

    training_red_dfs, testing_red_dfs = cfs.random_list_split( red_preprocessed_dfs, split_percentage = 0.8 )

    training_red_df = pd.concat( training_red_dfs )

    testing_red_df = pd.concat( testing_red_dfs )

    #

    X_colnames = [ 'speedMps', 'brakePercentage', 'throttlePercentage', 'steeringPercentage', 'LatLonTotalStdDev', \
                   'TernaryTurnSignal', 'BinaryContainLights' ]

    Y_colname = 'BinaryDisengagementExpanded'

    #

    X_train = training_red_df[ X_colnames ]

    Y_train = training_red_df[ Y_colname ]

    #

    X_test = testing_red_df[ X_colnames ]

    Y_test = testing_red_df[ Y_colname ]

    #

    model = xgb.XGBClassifier( objective = 'binary:logistic', 
                               early_stopping_rounds = 10, 
                               eval_metric = 'aucpr',
                               verbose = False )

    model.fit( X_train, Y_train, eval_set = [ ( X_test, Y_test ) ] )

    #

    Y_pred = model.predict( X_test )

    #

    TP, TN, FP, FN, precision, recall, tnr, accuracy, balanced_accuracy = ML_metrics( cfs.confusion_matrix_values( Y_pred, Y_test ), display = False )

    unique_disengagement_accu, num_of_unique_TP_DisengagementIDs, num_of_unique_DisengagementIDs, _, _, _ = unique_disengagement_accuracy( Y_pred, Y_test, testing_red_df[ 'DisengagementExpandedID' ], display = False )

    #

    metric_list[ 0 ].append( TP )

    metric_list[ 1 ].append( TN )

    metric_list[ 2 ].append( FP )

    metric_list[ 3 ].append( FN )

    metric_list[ 4 ].append( precision )

    metric_list[ 5 ].append( recall )

    metric_list[ 6 ].append( tnr )

    metric_list[ 7 ].append( accuracy )

    metric_list[ 8 ].append( balanced_accuracy )

    metric_list[ 9 ].append( unique_disengagement_accu )

    metric_list[ 10 ].append( num_of_unique_TP_DisengagementIDs )

    metric_list[ 11 ].append( num_of_unique_DisengagementIDs )

    #

    os.system( 'clear' )

In [None]:
metric_list[ 0 ].append( TP )

metric_list[ 1 ].append( TN )

metric_list[ 2 ].append( FP )

metric_list[ 3 ].append( FN )

metric_list[ 4 ].append( precision )

metric_list[ 5 ].append( recall )

metric_list[ 6 ].append( tnr )

metric_list[ 7 ].append( accuracy )

metric_list[ 8 ].append( balanced_accuracy )

metric_list[ 9 ].append( unique_disengagement_accu )

metric_list[ 10 ].append( num_of_unique_TP_DisengagementIDs )

metric_list[ 11 ].append( num_of_unique_DisengagementIDs )

In [None]:
print( f'Average # of TPs: { np.mean( metric_list[ 0 ] ):.3f}' )
print( f'Stddev of # of TPs: { np.std( metric_list[ 0 ] ):.3f}\n' )

print( f'Average # of TNs: { np.mean( metric_list[ 1 ] ):.3f}' )
print( f'Stddev of # of TNs: { np.std( metric_list[ 1 ] ):.3f}\n' )

print( f'Average # of FPs: { np.mean( metric_list[ 2 ] ):.3f}' )
print( f'Stddev of # of FPs: { np.std( metric_list[ 2 ] ):.3f}\n' )

print( f'Average # of FNs: { np.mean( metric_list[ 3 ] ):.3f}' )
print( f'Stddev of # of FNs: { np.std( metric_list[ 3 ] ):.3f}\n' )

print( f'Average Precision: { np.mean( metric_list[ 4 ] ):.3f}' )
print( f'Stddev of Precision: { np.std( metric_list[ 4 ] ):.3f}\n' )

print( f'Average Recall: { np.mean( metric_list[ 5 ] ):.3f}' )
print( f'Stddev of Recall: { np.std( metric_list[ 5 ] ):.3f}\n' )

print( f'Average TN Rate: { np.mean( metric_list[ 6 ] ):.3f}' )
print( f'Stddev of TN Rate: { np.std( metric_list[ 6 ] ):.3f}\n' )

print( f'Average Accuracy: { np.mean( metric_list[ 7 ] ):.3f}' )
print( f'Stddev of Accuracy: { np.std( metric_list[ 7 ] ):.3f}\n' )

print( f'Average Balanced Accuracy: { np.mean( metric_list[ 8 ] ):.3f}' )
print( f'Stddev of Balanced Accuracy: { np.std( metric_list[ 8 ] ):.3f}\n' )

print( f'Average Unique Disengagement Accuracy: { np.mean( metric_list[ 9 ] ):.3f}' )
print( f'Stddev of Unique Disengagement Accuracy: { np.std( metric_list[ 9 ] ):.3f}\n' )

print( f'Average # of Unique TP Disengagement IDs: { np.mean( metric_list[ 10 ] ):.3f}' )
print( f'Stddev of # of Unique TP Disengagement IDs: { np.std( metric_list[ 10 ] ):.3f}\n' )

print( f'Average # of Unique Disengagement IDs: { np.mean( metric_list[ 11 ] ):.3f}' )
print( f'Stddev of # of Unique Disengagement IDs: { np.std( metric_list[ 11 ] ):.3f}\n' )