In [1]:
import pandas as pd
import numpy as np
import pickle

In [22]:
def anomaly_detection(df, de_model, fe_model):
    
    de_signal = df['DE'].reset_index(drop = True)
    fe_signal = df['FE'].reset_index(drop = True)

    de_pred = de_model.predict(start=de_signal.index[0], end=de_signal.index[-1], dynamic = False)
    fe_pred = fe_model.predict(start=fe_signal.index[0], end=fe_signal.index[-1], dynamic = False)

    mse_de = np.mean((de_signal.values.reshape(-1, 1) - de_pred.values.reshape(-1, 1)) ** 2, axis = 1)
    mse_fe = np.mean((fe_signal.values.reshape(-1, 1) - fe_pred.values.reshape(-1, 1)) ** 2, axis = 1)
    avg_mse = (mse_de + mse_fe) / 2
    best_thr = 0.013882980550933777
    df['anomaly'] = np.where(avg_mse >= best_thr, 1, 0)
    anomaly_df = df[df['anomaly'] == 1]

    return df

def classification(X, df, model):

    classifier_thr = 0.21398088
    predictions = model.predict_proba(X)[:, 1] # positive class probability
    df['classifier_prediction'] = np.where(predictions >= classifier_thr, 1, 0)

    return df

def two_step_system(df):

    with open('arima_model_de.pkl', 'rb') as pkl_file:
        de_model = pickle.load(pkl_file)
    with open('arima_model_fe.pkl', 'rb') as pkl_file:
        fe_model = pickle.load(pkl_file)
    with open('xgb_model.pkl', 'rb') as file:
        classifier = pickle.load(file)

    df = anomaly_detection(df, de_model, fe_model)
    X_classifier = df[['DE', 'FE']]
    df = classification(X_classifier, df, classifier)
    df['final_prediction'] = np.where((df['anomaly'] == 1) & (df['classifier_prediction'] == 1), 1, 0)

    return df

In [26]:
def print_scores(y_true, y_pred):
    print(f"ROC AUC SCORE: {roc_auc_score(y_true, y_pred)}")
    print(f"PRECISION SCORE: {precision_score(y_true, y_pred)}")
    print(f"ACCURACY SCORE: {accuracy_score(y_true, y_pred)}")
    print(f"RECALL SCORE: {recall_score(y_true, y_pred)}")
    print(f"F1 SCORE: {f1_score(y_true, y_pred)}")

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score, precision_score, recall_score, f1_score, accuracy_score, confusion_matrix

In [5]:
df = pd.read_csv('parsed_data/case_edu_bearing_data/final_parsed_data/1730_case_edu.csv')
df_fault = df[(df['fault_diameter'] == 14) & (df['fault_element'] == 1) & (df['fault'] == 1) & (df['fault_end'] == 2)]
df_normal = df[df['fault'] == 0]

df_final = pd.concat((df_normal, df_fault))
df_final = df_final.sample(frac = 1)

In [6]:
de_train, de_test, fe_train, fe_test = train_test_split(df_final[df_final['fault'] == 0]['DE'], df_final[df_final['fault'] == 0]['FE'], test_size=0.25, random_state=42)
faults = df_final[df_final['fault'] == 1][['DE', 'FE', 'fault']].sample(len(de_test)//2)
de_test = pd.concat((de_test, faults['DE']))
fe_test = pd.concat((fe_test, faults['FE']))
y_test = df.loc[de_test.index, 'fault']

In [7]:
de_test

423786     0.000000
435442     0.160634
248598     0.036299
353561     0.163554
412864     0.078648
             ...   
3078474    0.011370
3044382    0.686451
3139520   -0.309601
3096662    0.094375
3075777   -0.323083
Name: DE, Length: 182116, dtype: float64

In [8]:
fe_test

423786     0.196004
435442     0.033078
248598    -0.015615
353561     0.040885
412864     0.046844
             ...   
3078474   -0.011788
3044382   -1.135336
3139520   -0.658113
3096662    0.248774
3075777   -1.750768
Name: FE, Length: 182116, dtype: float64

In [19]:
df = pd.DataFrame({'DE' : de_test,
                   'FE' : fe_test})

In [20]:
df

Unnamed: 0,DE,FE
423786,0.000000,0.196004
435442,0.160634,0.033078
248598,0.036299,-0.015615
353561,0.163554,0.040885
412864,0.078648,0.046844
...,...,...
3078474,0.011370,-0.011788
3044382,0.686451,-1.135336
3139520,-0.309601,-0.658113
3096662,0.094375,0.248774


In [23]:
y_pred = two_step_system(df)

In [25]:
y_pred_bin = y_pred['final_prediction'].values

In [12]:
with open('arima_model_de.pkl', 'rb') as pkl_file:
    de_model = pickle.load(pkl_file)
with open('arima_model_fe.pkl', 'rb') as pkl_file:
    fe_model = pickle.load(pkl_file)

In [13]:
de_signal = de_test
fe_signal = fe_test

In [14]:
de_pred = de_model.predict(start=de_signal.index[0], end=de_signal.index[-1], dynamic = False)
#fe_pred = fe_model.predict(start=fe_signal.index[0], end=fe_signal.index[-1], dynamic = False)

KeyboardInterrupt: 

In [27]:
print_scores(y_test, y_pred_bin)

ROC AUC SCORE: 0.5628243154940985
PRECISION SCORE: 0.9954427083333334
ACCURACY SCORE: 0.7084550506270728
RECALL SCORE: 0.12593690799769378
F1 SCORE: 0.22358704394238502


In [28]:
tn, fp, fn, tp = confusion_matrix(y_test, y_pred_bin).ravel()
print(pd.DataFrame({'Predicted Positive' : [tp, fp],
              'Predicted Negative' : [fn, tn]}, index = ['Measured Positive', 'Measured Negative']))
print_scores(y_test, y_pred_bin)

                   Predicted Positive  Predicted Negative
Measured Positive                7645               53060
Measured Negative                  35              121376
ROC AUC SCORE: 0.5628243154940985
PRECISION SCORE: 0.9954427083333334
ACCURACY SCORE: 0.7084550506270728
RECALL SCORE: 0.12593690799769378
F1 SCORE: 0.22358704394238502
