In [1]:
import os 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_curve, confusion_matrix, classification_report, roc_auc_score

In [2]:
# display test scores and return result string and indexes of false samples
def display_test_scores(test, pred):
    str_out = ""
    str_out += ("TEST SCORES\n")
    str_out += ("\n")

    #print accuracy
    accuracy = accuracy_score(test, pred)
    str_out += ("ACCURACY: {:.4f}\n".format(accuracy))
    str_out += ("\n")
    
    #print confusion matrix
    str_out += ("CONFUSION MATRIX:\n")
    conf_mat = confusion_matrix(test, pred)
    str_out += ("{}".format(conf_mat))
    str_out += ("\n")
    str_out += ("\n")

    #print FP, FN
    str_out += ("FALSE POSITIVES:\n")
    fp = conf_mat[1][0]
    pos_labels = conf_mat[1][0]+conf_mat[1][1]
    str_out += ("{} out of {} positive labels ({:.4f}%)\n".format(fp, pos_labels,fp/pos_labels))
    str_out += ("\n")

    str_out += ("FALSE NEGATIVES:\n")
    fn = conf_mat[0][1]
    neg_labels = conf_mat[0][1]+conf_mat[0][0]
    str_out += ("{} out of {} negative labels ({:.4f}%)\n".format(fn, neg_labels, fn/neg_labels))
    str_out += ("\n")

    
    #print classification report
    str_out += ("PRECISION, RECALL, F1 scores:\n")
    str_out += ("{}".format(classification_report(test, pred)))
    
    false_indexes = np.where(test != pred)
    return str_out, false_indexes

In [3]:
df_rldd = pd.read_pickle('../input/rlddandmerged/rldd_normalized_scaled.pkl')

X = df_rldd.loc[:, ["n_EAR", 
                    "n_MAR", "n_MOE", "n_EC",
                    "n_LEB", "n_SOP", "PERCLOS", "CLOSENESS"]]

y = df_rldd.loc[:, "DROWSINESS"].map({0: 0, 0.5: 1, 1:2})


y_pred = X["n_EAR"]<0.23
y_pred = y_pred.astype(int)*2
results, false = display_test_scores(y, y_pred)
print(results)

  _warn_prf(average, modifier, msg_start, len(result))


TEST SCORES

ACCURACY: 0.4271

CONFUSION MATRIX:
[[626698      0 367724]
 [497122      0 494477]
 [359592      0 654870]]

FALSE POSITIVES:
497122 out of 497122 positive labels (1.0000%)

FALSE NEGATIVES:
0 out of 626698 negative labels (0.0000%)

PRECISION, RECALL, F1 scores:
              precision    recall  f1-score   support

           0       0.42      0.63      0.51    994422
           1       0.00      0.00      0.00    991599
           2       0.43      0.65      0.52   1014462

    accuracy                           0.43   3000483
   macro avg       0.28      0.43      0.34   3000483
weighted avg       0.29      0.43      0.34   3000483



In [4]:
y.unique()

array([0, 2, 1])

In [5]:
df_merged = pd.read_pickle('../input/rlddandmerged/merged_normalized_scaled.pkl')

X2 = df_merged.loc[:, ["n_EAR", 
                    "n_MAR", "n_MOE", "n_EC",
                    "n_LEB", "n_SOP", "PERCLOS", "CLOSENESS"]]

y2 = df_merged.loc[:, "DROWSINESS"].map({0: 0, 0.5: 1, 1:2})

y_pred2 = X2["n_EAR"]<0.23
y_pred2 = y_pred2.astype(int)*2
results2, false2 = display_test_scores(y2, y_pred2)
print(results2)

  _warn_prf(average, modifier, msg_start, len(result))


TEST SCORES

ACCURACY: 0.4570

CONFUSION MATRIX:
[[761753      0 486791]
 [491025      0 500574]
 [484538      0 890395]]

FALSE POSITIVES:
491025 out of 491025 positive labels (1.0000%)

FALSE NEGATIVES:
0 out of 761753 negative labels (0.0000%)

PRECISION, RECALL, F1 scores:
              precision    recall  f1-score   support

           0       0.44      0.61      0.51   1248544
           1       0.00      0.00      0.00    991599
           2       0.47      0.65      0.55   1374933

    accuracy                           0.46   3615076
   macro avg       0.30      0.42      0.35   3615076
weighted avg       0.33      0.46      0.38   3615076



In [6]:
y2.unique()

array([0, 2, 1])

In [7]:
df_nthu = pd.read_pickle('../input/ddd-final-datasets/final_step2_scaled.pkl')

X3 = df_nthu.loc[:, ["n_EAR", 
                    "n_MAR", "n_MOE", "n_EC",
                    "n_LEB", "n_SOP", "PERCLOS", "CLOSENESS"]]

y3 = df_nthu.loc[:, "DROWSINESS"].map({0: 0, 1:2})

y_pred3 = X3["n_EAR"]<0.23
y_pred3 = y_pred3.astype(int)*2
results3, false3 = display_test_scores(y3, y_pred3)
print(results3)

TEST SCORES

ACCURACY: 0.6073

CONFUSION MATRIX:
[[137140 116982]
 [124342 236129]]

FALSE POSITIVES:
124342 out of 360471 positive labels (0.3449%)

FALSE NEGATIVES:
116982 out of 254122 negative labels (0.4603%)

PRECISION, RECALL, F1 scores:
              precision    recall  f1-score   support

           0       0.52      0.54      0.53    254122
           2       0.67      0.66      0.66    360471

    accuracy                           0.61    614593
   macro avg       0.60      0.60      0.60    614593
weighted avg       0.61      0.61      0.61    614593



In [8]:
y3.unique()

array([0, 2])