<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

# Model Read

In [17]:
#===============================================
# Basic Imports
BASE_FOLDER = '../../'
%run -i ..\..\utility\feature_extractor\JupyterLoad_feature_extractor.py
%run -i ..\..\utility\modeling\JupyterLoad_modeling.py

from sklearn.metrics import roc_auc_score, confusion_matrix 
from tqdm import tqdm
import os
import glob
import seaborn as sns
sns.set()

load feature_extractor_mother
load feature_extractor_mel_spectra
load feature_extractor_psd
load feature_extractor_ICA2
load feature_extractore_pre_nnFilterDenoise
load extractor_diagram_mother
load Simple_FIR_HP
load TimeSliceAppendActivation
load load_data
Load split_data
Load anomaly_detection_models
Load pseudo_supervised_models
Load tensorflow models
Load detection_pipe


In [18]:
path = BASE_FOLDER + './modeling/unsupervised/models_agg.dataframe'
df = pd.read_pickle(path)

In [23]:
ensemble_pipes = {}
for SNR in ['6dB', 'min6dB']:
    for machine in ['pump', 'fan', 'slider', 'valve']:
        for ID in ['00', '02', '04', '06']:
            paths = glob.glob('pipes/*_{}_{}_ID{}*'.format(SNR, machine, ID))
            if len(paths) !=3:
                print(paths)
            key = '_'.join([SNR, machine, ID])
            ensemble_pipes[key] = []
            for path in paths:
                with open(path, 'rb') as f:
                    ensemble_pipes[key].append(pickle.load(f))

In [25]:
ensemble_dfs = {}
for ensemble_key, ensemble in ensemble_pipes.items():
    for i, pipe in enumerate(ensemble):
        if i==0:
            ensemble_dfs[ensemble_key] = pd.DataFrame(pipe.df_test.groupby(by='path').sum().pred_scores.rename(pipe.model.name+'_'+pipe.task['feat_col']+'_pred_scores').reindex())
            ground_truth = pipe.df_test.groupby(by='path').median().abnormal.rename('ground_truth')
            score_unweighted = pd.Series(np.zeros(ensemble_dfs[ensemble_key].iloc[:,0].shape), index=ensemble_dfs[ensemble_key].index)
            score_weighted = score_unweighted
        else:
            ensemble_dfs[ensemble_key] = ensemble_dfs[ensemble_key].join(pipe.df_test.groupby(by='path').sum().pred_scores.rename(pipe.model.name+'_'+pipe.task['feat_col']+'_pred_scores'))
        
        if pipe.model.name == 'IsoFor':
            if pipe.task['feat_col'] == 'PSD_raw':
                weight = 1.
            else:
                weight = .75
        elif pipe.model.name == 'AutoEnc':
            weight = 1.25
        elif pipe.model.name == 'SVM':
            weight = 1.
        std = ensemble_dfs[ensemble_key].iloc[:,-1].std()
        score_weighted = score_weighted + weight * (ensemble_dfs[ensemble_key].iloc[:,-1] - ensemble_dfs[ensemble_key].iloc[:,-1].mean())/std
        score_unweighted = score_unweighted + (ensemble_dfs[ensemble_key].iloc[:,-1] - ensemble_dfs[ensemble_key].iloc[:,-1].mean())/std
    
    ensemble_dfs[ensemble_key] = ensemble_dfs[ensemble_key].join(score_weighted.rename('score_agg_weighted'))
    std_weighted = ensemble_dfs[ensemble_key].score_agg_weighted.std()
    threshold = -.25*std_weighted
    ensemble_dfs[ensemble_key] = ensemble_dfs[ensemble_key].join(score_unweighted.rename('score_agg_unweighted'))
    ensemble_dfs[ensemble_key] = ensemble_dfs[ensemble_key].join((score_weighted<threshold).rename('prediction_abnormal_weighted').astype(np.float).replace(to_replace=1, value=-1).replace(to_replace=0, value=1))
    ensemble_dfs[ensemble_key] = ensemble_dfs[ensemble_key].join(ground_truth.replace(to_replace=1, value=-1).replace(to_replace=0, value=1))

In [26]:
with open('ensemble_dfs.dataframe', 'wb') as f:
    pickle.dump(ensemble_dfs, f)

In [27]:
results = pd.DataFrame([])
for ensemble_key, ensemble in ensemble_dfs.items():
    keys = ensemble_key.split('_')
    
    conf_mat = confusion_matrix(ensemble.ground_truth, ensemble.prediction_abnormal_weighted)
    
    fp = conf_mat[1, 0].astype(np.float)
    fn = conf_mat[0, 1].astype(np.float)
    tp = conf_mat[0, 0].astype(np.float)
    tn = conf_mat[1, 1].astype(np.float)
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    fpr = fp/(fp+tn)
    fnr = fn/(fn+tp)
    r_fp = fp/np.sum(conf_mat)
    r_fn = fn/np.sum(conf_mat)
    
    data = keys + [ conf_mat,
                   fp, fn, tp, tn, precision, recall, fpr, fnr, r_fp, r_fn,
        roc_auc_score(ensemble_dfs[ensemble_key].ground_truth, ensemble_dfs[ensemble_key].score_agg_weighted), 
        roc_auc_score(ensemble_dfs[ensemble_key].ground_truth, ensemble_dfs[ensemble_key].score_agg_unweighted)]

    cols = ['SNR', 'machine', 'ID', 'confusion_matrix', 
            'fp', 'fn', 'tp', 'tn', 'precision', 'recall', 'fpr', 'fnr', 'r_fp', 'r_fn',
            'score_aggr_weighted', 'score_aggr_unweighted']
    
    for pipe in ensemble_pipes[ensemble_key]:
        data.append(pipe.roc_auc)
        cols.append('score_'+pipe.model.name+pipe.task['feat_col'])
    results = results.append(pd.DataFrame(data=[data], columns=cols, index=[ensemble_key]))

In [35]:
results.groupby(by=['SNR']).mean()

Unnamed: 0_level_0,fp,fn,tp,tn,precision,recall,fpr,fnr,r_fp,r_fn,score_aggr_weighted,score_aggr_unweighted,score_AutoEncMEL_den,score_IsoForMEL_den,score_IsoForPSD_raw
SNR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
6dB,6.4375,51.25,155.0,199.8125,0.955016,0.773437,0.036597,0.226563,0.018298,0.113282,0.969906,0.968828,0.924169,0.891087,0.937031
min6dB,36.25,86.5,119.75,170.0,0.749119,0.568264,0.179625,0.431736,0.089813,0.215868,0.768559,0.764145,0.74495,0.685573,0.73226


In [34]:
with open('results.csv', 'wb') as f:
    results.to_csv('results.csv')

In [29]:
results.to_pickle('ensemble_results.dataframe')