<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

# Model Read

In [3]:
#===============================================
# Basic Imports
BASE_FOLDER = '../../'
%run -i ..\..\utility\feature_extractor\JupyterLoad_feature_extractor.py
%run -i ..\..\utility\modeling\JupyterLoad_modeling.py

from sklearn.metrics import roc_auc_score
from tqdm import tqdm
import os
import glob
import seaborn as sns
sns.set()

load feature_extractor_mother
load feature_extractor_mel_spectra
load feature_extractor_psd
load feature_extractor_ICA2
load feature_extractore_pre_nnFilterDenoise
load extractor_diagram_mother
load Simple_FIR_HP
load TimeSliceAppendActivation
load load_data
Load split_data
Load anomaly_detection_models
Load pseudo_supervised_models
Load tensorflow models
Load detection_pipe


In [8]:
path = BASE_FOLDER + './modeling/unsupervised/models_agg.dataframe'
df = pd.read_pickle(path)

In [9]:
mask1 = (df.model_type=='AutoEnc') & (df.feature_type=='MEL_den') & (df.feat_frames==5.0)
mask2 = (df.model_type=='IsoFor') & (df.feature_type=='MEL_den') & (df.feat_frames==3.0)
mask3 = (df.model_type=='IsoFor') & (df.feature_type=='PSD_raw')
mask4 = (df.model_type=='SVM') & (df.feature_type=='PSD_raw')
df = df[mask1 | mask2 | mask3 | mask4]
df = df.dropna(axis=1, thresh=1)


In [10]:
df = df.sort_values(by='roc_auc_aggr', ascending=False).drop_duplicates(subset=['machine', 'ID', 'SNR', 'model_type', 'feature_type'], keep='first')

In [11]:
for SNR in ['6dB', 'min6dB']:
    for machine in ['fan', 'pump', 'slider', 'valve']:
        for ID in ['00', '02']:
            df_tmp = df[(df.machine==machine) & (df.SNR==SNR) & (df.ID==ID)]
            if df_tmp.shape[0] != 4:
                print(SNR, machine, ID, 'existing:', [(row.model_type, row.feature_type) for _, row in df_tmp.iterrows()])

6dB fan 00 existing: [('IsoFor', 'MEL_den'), ('AutoEnc', 'MEL_den'), ('IsoFor', 'PSD_raw')]
6dB fan 02 existing: [('IsoFor', 'PSD_raw'), ('AutoEnc', 'MEL_den'), ('IsoFor', 'MEL_den')]
6dB pump 00 existing: [('AutoEnc', 'MEL_den'), ('IsoFor', 'MEL_den'), ('IsoFor', 'PSD_raw')]
6dB pump 02 existing: [('IsoFor', 'MEL_den'), ('AutoEnc', 'MEL_den'), ('IsoFor', 'PSD_raw')]
6dB slider 00 existing: [('IsoFor', 'PSD_raw'), ('IsoFor', 'MEL_den'), ('AutoEnc', 'MEL_den')]
6dB slider 02 existing: [('IsoFor', 'PSD_raw'), ('AutoEnc', 'MEL_den'), ('IsoFor', 'MEL_den')]
6dB valve 00 existing: [('AutoEnc', 'MEL_den'), ('IsoFor', 'MEL_den'), ('IsoFor', 'PSD_raw')]
6dB valve 02 existing: [('IsoFor', 'PSD_raw'), ('AutoEnc', 'MEL_den'), ('IsoFor', 'MEL_den')]


In [12]:
ensemble_pipes_selection = {}
for SNR in ['6dB', 'min6dB']:
    for machine in ['fan', 'pump', 'slider', 'valve']:
        for ID in ['00', '02']:
            ensemble_pipes_selection[SNR+machine+ID] = []
            df_tmp = df[(df.machine==machine) & (df.SNR==SNR) & (df.ID==ID)]
            for _, row in df_tmp.iterrows():
                with open(row.model_path, 'rb') as f:
                    ensemble_pipes_selection[SNR+machine+ID].append(pickle.load(f))


In [15]:
for ensemble_key, ensemble in ensemble_pipes_selection.items():
    for i, pipe in enumerate(ensemble):
        if not glob.glob(pipe.filepath[0:60] + '*'):
            old_path = pipe.filepath
            pipe.model = pipe._mdl(**pipe.model_args)
            #print(pipe.task)
            try:
                pipe.task['path_descr'] = glob.glob(BASE_FOLDER 
                                #+ '/dataset/extdia_v1*/{}{}{}_EDiaV1*aug'.format(pipe.task['machine'], pipe.task['SNR'], pipe.task['ID'])
                                + '/dataset/extdia_v1*/{}{}{}_EDiaV1'.format(pipe.task['machine'], pipe.task['SNR'], pipe.task['ID'])
                                + "*pandaDisc*.pkl", recursive=True)[0]
            except:
                pipe.task['path_descr'] = glob.glob(BASE_FOLDER 
                                + '/dataset/extdia_v1*/{}{}{}_EDiaV1'.format(pipe.task['machine'], pipe.task['SNR'], pipe.task['ID'])
                                + "*pandaDisc*.pkl", recursive=True)[0]
            try:
                pipe.run_pipe(pipe.task)
            except:
                print(pipe.task)

../..//dataset\extdia_v1\fanmin6dB00_EDiaV1HP_pandaDisc.pkl --> Done
...loading data
data loading completed

...preprocessing data
data preprocessing finished

...fitting the model
{'path_descr': '../..//dataset\\extdia_v1\\fanmin6dB00_EDiaV1HP_pandaDisc.pkl', 'feat': {'function': 'flat'}, 'feat_col': 'PSD_raw', 'SNR': 'min6dB', 'machine': 'fan', 'ID': '00', 'BASE_FOLDER': '../../'}
../..//dataset\extdia_v1\fanmin6dB02_EDiaV1HP_pandaDisc.pkl --> Done
...loading data
data loading completed

...preprocessing data
data preprocessing finished

...fitting the model
{'path_descr': '../..//dataset\\extdia_v1\\fanmin6dB02_EDiaV1HP_pandaDisc.pkl', 'feat': {'function': 'flat'}, 'feat_col': 'PSD_raw', 'SNR': 'min6dB', 'machine': 'fan', 'ID': '02', 'BASE_FOLDER': '../../'}
../..//dataset\extdia_v1\pumpmin6dB02_EDiaV1HP_pandaDisc.pkl --> Done
...loading data
data loading completed

...preprocessing data
data preprocessing finished

...fitting the model
{'path_descr': '../..//dataset\\extdia_v1\\pum

In [28]:
ensemble_pipes = {}
paths = {}
for SNR in ['6dB', 'min6dB']:
    for machine in ['fan', 'pump', 'slider', 'valve']:
        for ID in ['00', '02']:
            paths[SNR+machine+ID] = glob.glob(BASE_FOLDER + 'modeling/ensemble/pipes/*_{}_{}_ID{}*.pkl'.format(SNR,machine,ID))
            ensemble_pipes[SNR+machine+ID] = []
            for path in paths[SNR+machine+ID]:
                with open(path, 'rb') as f:
                    ensemble_pipes[SNR+machine+ID].append(pickle.load(f))



In [87]:
ensemble_dfs = {}
for ensemble_key, ensemble in ensemble_pipes.items():
    for i, pipe in enumerate(ensemble):
        if i==0:
            ensemble_dfs[ensemble_key] = pd.DataFrame(pipe.df_test.groupby(by='path').sum().pred_scores.rename(pipe.model.name+'_'+pipe.task['feat_col']+'_pred_scores').reindex())
            ground_truth = pipe.df_test.groupby(by='path').median().abnormal.rename('ground_truth')
        else:
            ensemble_dfs[ensemble_key] = ensemble_dfs[ensemble_key].join(pipe.df_test.groupby(by='path').sum().pred_scores.rename(pipe.model.name+'_'+pipe.task['feat_col']+'_pred_scores'))
    s_tmp = pd.Series(np.zeros(ensemble_dfs[ensemble_key].iloc[:,0].shape), index=ensemble_dfs[ensemble_key].index)
    for col in ensemble_dfs[ensemble_key].columns:
        # 
        s_tmp = s_tmp + (ensemble_dfs[ensemble_key][col] - ensemble_dfs[ensemble_key][col].mean())/ensemble_dfs[ensemble_key][col].std()
        # 
    ensemble_dfs[ensemble_key] = ensemble_dfs[ensemble_key].join(s_tmp.rename('score_agg'))
    ensemble_dfs[ensemble_key] = ensemble_dfs[ensemble_key].join(ground_truth.replace(to_replace=1, value=-1).replace(to_replace=0, value=1))

In [111]:
results = pd.DataFrame([])
for ensemble_key, ensemble in ensemble_dfs.items():
    data = [roc_auc_score(dfs_ensemble[ensemble_key].ground_truth, dfs_ensemble[ensemble_key].score_agg)]
    cols = ['score_aggr']
    for pipe in ensemble_pipes[ensemble_key]:
        data.append(pipe.roc_auc)
        cols.append('score_'+pipe.model.name+pipe.task['feat_col'])
    results = results.append(pd.DataFrame(data=[data], columns=cols, index=[ensemble_key]))

In [113]:
ensemble_dfs[ensemble_key]

Unnamed: 0_level_0,IsoFor_MEL_den_pred_scores,AutoEnc_MEL_den_pred_scores,IsoFor_PSD_raw_pred_scores,score_agg,ground_truth
path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
\dataset\min6dB\valve\id_02\abnormal\00000000.wav,16.135058,-283.511700,0.056578,1.024252,-1.0
\dataset\min6dB\valve\id_02\abnormal\00000001.wav,11.920137,-384.805768,0.052994,-1.094628,-1.0
\dataset\min6dB\valve\id_02\abnormal\00000002.wav,20.571625,-242.225129,-0.043405,0.589782,-1.0
\dataset\min6dB\valve\id_02\abnormal\00000003.wav,12.583695,-271.428759,-0.036103,-1.171733,-1.0
\dataset\min6dB\valve\id_02\abnormal\00000004.wav,16.926014,-253.893691,-0.028628,0.011575,-1.0
...,...,...,...,...,...
\dataset\min6dB\valve\id_02\normal\00000674.wav,18.140503,-261.547330,0.056460,1.677182,1.0
\dataset\min6dB\valve\id_02\normal\00000677.wav,14.346098,-352.995391,0.055956,-0.184660,1.0
\dataset\min6dB\valve\id_02\normal\00000681.wav,5.286752,-442.490077,0.002638,-3.980705,1.0
\dataset\min6dB\valve\id_02\normal\00000683.wav,22.830955,-167.425202,0.062338,3.840312,1.0


In [112]:
results

Unnamed: 0,score_aggr,score_IsoForMEL_den,score_AutoEncMEL_den,score_IsoForPSD_raw
6dBfan00,0.949526,0.921869,0.896751,0.895049
6dBfan02,0.996423,0.908395,0.937136,0.993428
6dBpump00,0.997213,0.954546,0.973876,0.919018
6dBpump02,0.997565,0.980974,0.995676,0.98466
6dBslider00,0.999613,0.926099,0.948102,1.0
6dBslider02,0.999565,0.962113,0.974433,0.998724
6dBvalve00,0.846197,0.868537,0.941752,0.342561
6dBvalve02,0.997639,0.823805,0.909417,0.998819
min6dBfan00,0.554401,0.541677,0.579596,0.513435
min6dBfan02,0.793181,0.752485,0.790969,0.690497


In [114]:
results.score_AutoEncMEL_den.mean()

0.8451687995998449

In [116]:
results.score_IsoForMEL_den.mean()

0.8067499748157827

In [117]:
results.score_IsoForPSD_raw.mean()

0.8212180709891457

In [63]:
for col in predictions['6dBpump00'].columns:
    s_tmp = s_tmp + (predictions['min6dBpump00'][col] - predictions['min6dBpump00'][col].mean())/predictions['min6dBpump00'][col].std()
predictions['min6dBpump00'].join(s_tmp.rename('score_agg'))

Unnamed: 0_level_0,IsoFor_MEL_den_pred_scores,AutoEnc_MEL_den_pred_scores,IsoFor_PSD_raw_pred_scores,score_agg
path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
\dataset\min6dB\pump\id_00\abnormal\00000000.wav,25.929776,-170.771882,0.050644,3.250660
\dataset\min6dB\pump\id_00\abnormal\00000001.wav,23.810519,-220.127161,-0.065106,-5.056553
\dataset\min6dB\pump\id_00\abnormal\00000002.wav,24.797648,-172.900647,0.086295,4.355213
\dataset\min6dB\pump\id_00\abnormal\00000003.wav,23.132294,-213.091062,-0.040864,-3.959239
\dataset\min6dB\pump\id_00\abnormal\00000004.wav,18.750950,-242.141352,0.001273,-5.025108
...,...,...,...,...
\dataset\min6dB\pump\id_00\normal\00000954.wav,24.595772,-180.009419,0.118014,5.527871
\dataset\min6dB\pump\id_00\normal\00000960.wav,20.509992,-160.364949,0.126204,4.713529
\dataset\min6dB\pump\id_00\normal\00000973.wav,19.892476,-187.501463,0.113700,2.854681
\dataset\min6dB\pump\id_00\normal\00000974.wav,-1.823076,-538.512740,0.106417,-20.198280


In [64]:
s_tmp

path
\dataset\min6dB\pump\id_00\abnormal\00000000.wav     3.250660
\dataset\min6dB\pump\id_00\abnormal\00000001.wav    -5.056553
\dataset\min6dB\pump\id_00\abnormal\00000002.wav     4.355213
\dataset\min6dB\pump\id_00\abnormal\00000003.wav    -3.959239
\dataset\min6dB\pump\id_00\abnormal\00000004.wav    -5.025108
                                                      ...    
\dataset\min6dB\pump\id_00\normal\00000954.wav       5.527871
\dataset\min6dB\pump\id_00\normal\00000960.wav       4.713529
\dataset\min6dB\pump\id_00\normal\00000973.wav       2.854681
\dataset\min6dB\pump\id_00\normal\00000974.wav     -20.198280
\dataset\min6dB\pump\id_00\normal\00000988.wav       2.740053
Length: 286, dtype: float64