In [1]:
import numpy as np
from sklearn import metrics as sklearn_metrics
import joblib
import pickle
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from river.tree import HoeffdingAdaptiveTreeClassifier
from river.neighbors import KNNClassifier
from river.linear_model import LogisticRegression
from river import optim
from river.ensemble import AdaptiveRandomForestClassifier
from river.linear_model import PAClassifier
import sys
sys.path.append("..")
from utils import model_and_dataset_selection

In [4]:
n_days_lookahead, data_type, data_folder_name_dict, model_type, model_folder_name_dict = model_and_dataset_selection.metrics_select_online()
    
def get_all_metrics(true, predicted, score):
    confusion_matrix = sklearn_metrics.confusion_matrix(true, predicted)
    fpr_list, tpr_list, thersholds = roc_curve(true, score)
    roc_auc = auc(fpr_list, tpr_list)
    TP = confusion_matrix[0][0]
    FN = confusion_matrix[0][1]
    FP = confusion_matrix[1][0]
    TN =  confusion_matrix[1][1]
    precision_of_failed = TP / (TP + FP)
    precision_of_healthy = TN / (TN + FN)
    tpr = TP / (TP + FN)
    fpr = FP / (TN + FP)
    auc_score = roc_auc
    f1_score = 2*precision_of_failed*tpr / (precision_of_failed+tpr)
    print('precision of failed: ', precision_of_failed)
    print('precision of healthy: ', precision_of_healthy)
    print('tpr: ', tpr)
    print('fpr: ', fpr)
    print('auc: ', auc_score)
    print('f1-score: ', f1_score)
    print('roc curve: ')
    plt.plot(fpr_list, tpr_list)
    plt.xlim([-0.05, 1.05])  
    plt.ylim([-0.05, 1.05])
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.title('ROC Curve')
    plt.legend(loc="lower right")
    plt.show()

def loadData():

    X = np.load('../../data/' + data_folder_name_dict[data_type] + '/' + str(n_days_lookahead) + '_days_lookahead/smart_test.npy',allow_pickle=True)
    y = np.load('../../data/' + data_folder_name_dict[data_type] + '/' + str(n_days_lookahead) + '_days_lookahead/test_labels.npy', allow_pickle=True)

    X = X.astype('float32')
    y = y.astype('float32')
    return X.reshape((len(X),30,-1)), y

In [5]:

rf = joblib.load('../../trained_model/' + model_folder_name_dict[model_type] + '/' + str(n_days_lookahead) + '_days_lookahead/rf_online.pkl')
dt = joblib.load('../../trained_model/' + model_folder_name_dict[model_type] + '/' + str(n_days_lookahead) + '_days_lookahead/dt_online.pkl')
lr = joblib.load('../../trained_model/' + model_folder_name_dict[model_type] + '/' + str(n_days_lookahead) + '_days_lookahead/lr_online.pkl')
pac = joblib.load('../../trained_model/' + model_folder_name_dict[model_type] + '/' + str(n_days_lookahead) + '_days_lookahead/pac_online.pkl')
knn = joblib.load('../../trained_model/' + model_folder_name_dict[model_type] + '/' + str(n_days_lookahead) + '_days_lookahead/knn_online.pkl')

X_test, y_test = loadData()
X_test = X_test.reshape((len(X_test), -1))
print(X_test.shape)

headers = [str(i) for i in range(330)]
data_x_test = [dict(zip(headers, x)) for x in X_test]
data_y_test = [True if y == 1 else False for y in y_test]
y_true = []
y_pred_rf, y_pred_dt, y_pred_lr, y_pred_pac, y_pred_knn = [], [], [], [], []
y_score_rf, y_score_dt, y_score_lr, y_score_pac, y_score_knn = [], [], [], [], []
i = 0
for Xi, yi in zip(data_x_test, data_y_test):
    if i % 1000 == 0:
        print(i)
    i += 1

    y_pred_rf.append(rf.predict_one(Xi))
    y_score_rf.append(rf.predict_proba_one(Xi)[True])

    y_pred_dt.append(dt.predict_one(Xi))
    y_score_dt.append(dt.predict_proba_one(Xi)[True])

    y_pred_lr.append(lr.predict_one(Xi))
    y_score_lr.append(lr.predict_proba_one(Xi)[True])

    y_pred_pac.append(pac.predict_one(Xi))
    y_score_pac.append(pac.predict_proba_one(Xi)[True])

    y_pred_knn.append(knn.predict_one(Xi))
    y_score_knn.append(knn.predict_proba_one(Xi)[True])

    y_true.append(yi)

# print('----------------- RF -----------------')
get_all_metrics(np.asarray(y_true), np.asarray(y_pred_rf), np.asarray(y_score_rf))
# print('----------------- DT -----------------')
get_all_metrics(np.asarray(y_true), np.asarray(y_pred_dt), np.asarray(y_score_dt))
# print('----------------- LR -----------------')
get_all_metrics(np.asarray(y_true), np.asarray(y_pred_lr), np.asarray(y_score_lr))
# print('----------------- PAC -----------------')
get_all_metrics(np.asarray(y_true), np.asarray(y_pred_pac), np.asarray(y_score_pac))
# print('----------------- KNN -----------------')
get_all_metrics(np.asarray(y_true), np.asarray(y_pred_knn), np.asarray(y_score_knn))

(1468, 330)
0


KeyboardInterrupt: 