# A. EFFECT OF THE NUMBER OF TRAINING SAMPLES ON PERFORMANCE
In this experiment, we will evaluate the effect of proposed few-shot learning method to address the first two challenges in limited data fault diagnosis: 1) industry systems are not allowed to run into faulty states due to the consequences, especially for critical systems and failures; 2) most electromechanical failures occur slowly and follow a degradation path such that failure degradation of a system might take months or even years. We conducted a series of experiments on the training data in datasets D with 90, 120, 300, 900, 1500, 3000, 6000, 12000, 19800 samples respectively, then evaluated the effect of numbers on performance for each training model. We repeated each experiment ten times to deal with the randomness of the algorithms.


In [1]:
%matplotlib inline
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
# set the memory usage
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
set_session(tf.Session(config=tf_config))

import matplotlib.pyplot as plt
from scipy.io import loadmat
import numpy as np
import imp
import pandas as pd


Using TensorFlow backend.


# load data

In [2]:
import cwru 

window_size = 2048
data = cwru.CWRU(['12DriveEndFault'], ['1772', '1750', '1730'], window_size)
data.nclasses,data.classes,len(data.X_train),len(data.X_test)

Datasets/CWRU 12DriveEndFault 1730
0 Datasets/CWRU/NormalBaseline/1730/Normal.mat
(485643, 2)
1 Datasets/CWRU/NormalBaseline/1750/Normal.mat
dict_keys(['X099_FE_time', 'ans', '__header__', 'X098_DE_time', '__globals__', '__version__', 'X099_DE_time', 'X098_FE_time'])
(485063, 2)
2 Datasets/CWRU/NormalBaseline/1772/Normal.mat
(483903, 2)
3 Datasets/CWRU/12DriveEndFault/1730/0.007-Ball.mat
(121556, 2)
4 Datasets/CWRU/12DriveEndFault/1750/0.007-Ball.mat
(121556, 2)
5 Datasets/CWRU/12DriveEndFault/1772/0.007-Ball.mat
(121410, 2)
6 Datasets/CWRU/12DriveEndFault/1730/0.014-Ball.mat
(122136, 2)
7 Datasets/CWRU/12DriveEndFault/1750/0.014-Ball.mat
(121991, 2)
8 Datasets/CWRU/12DriveEndFault/1772/0.014-Ball.mat
(122136, 2)
9 Datasets/CWRU/12DriveEndFault/1730/0.021-Ball.mat
(122136, 2)
10 Datasets/CWRU/12DriveEndFault/1750/0.021-Ball.mat
(122136, 2)
11 Datasets/CWRU/12DriveEndFault/1772/0.021-Ball.mat
(121701, 2)
12 Datasets/CWRU/12DriveEndFault/1730/0.007-InnerRace.mat
(122917, 2)
13 Datasets/C

(10,
 [('NormalBaselineNormal', 0),
  ('12DriveEndFault0.007-Ball', 1),
  ('12DriveEndFault0.014-Ball', 2),
  ('12DriveEndFault0.021-Ball', 3),
  ('12DriveEndFault0.007-InnerRace', 4),
  ('12DriveEndFault0.014-InnerRace', 5),
  ('12DriveEndFault0.021-InnerRace', 6),
  ('12DriveEndFault0.007-OuterRace6', 7),
  ('12DriveEndFault0.014-OuterRace6', 8),
  ('12DriveEndFault0.021-OuterRace6', 9)],
 19800,
 750)

## Config

In [3]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


import siamese
# imp.reload(siamese)
import utils
imp.reload(utils)


settings = {}

exp_name = "EXP-AB"
exps = [60,90,120,200,300,600,900,1500,3000,6000,12000,19800]
times = 5

### Training & Testing

In [None]:
from sklearn import svm


def EXPAB_train_and_test(exp_name,exps,is_training):
    train_classes = sorted(list(set(data.y_train)))
    train_indices = [np.where(data.y_train == i)[0] for i in train_classes]
    for exp in exps:
        scores_svm = []
        num = int(exp/len(train_classes))
        for time_idx in range(times):
            seed = time_idx*10
            np.random.seed(seed)
            print("\n%s-%s"%(exp,time_idx) + '*'*80)
            settings["save_path"] = "tmp/%s/size_%s/time_%s/" % (exp_name,exp,time_idx)
            data._mkdir(settings["save_path"])

            train_idxs = []
            val_idxs = []
            for i, c in enumerate(train_classes):
                select_idx = train_indices[i][np.random.choice(len(train_indices[i]), num, replace=False)]
                split = int(0.6*num)
                train_idxs.extend(select_idx[:split])
                val_idxs.extend(select_idx[split:])
            X_train, y_train = data.X_train[train_idxs],data.y_train[train_idxs], 
            X_val, y_val = data.X_train[val_idxs],data.y_train[val_idxs], 
            
            X_train = np.concatenate((X_train, X_val), axis=0)
            y_train = np.concatenate((y_train, y_val), axis=0)
            
            clf = svm.SVC(gamma='scale', decision_function_shape='ovo')
#             print(X_train.shape)
            clf.fit(X_train.reshape(len(X_train),-1), y_train)

            pred = clf.predict(data.X_test.reshape(len(data.X_test),-1))
            score = accuracy_score(data.y_test,pred)*100
            print('svm:',score)
            scores_svm.append(score)
            
            if time_idx%10==0:
                utils.confusion_plot(pred,data.y_test)
                

        a =pd.DataFrame(np.array(scores_svm))
        a.to_csv("tmp/%s/size_%s/scores_svm.csv" % (exp_name,exp),index=True)  

        
EXPAB_train_and_test(exp_name,exps,is_training)

## Analysis

In [4]:
exps = [60,90,120,200,300,600,900,1500,6000,19800]
scores_svm_all = pd.DataFrame()
for exp in exps:
    file_path = "tmp/%s/size_%s" % (exp_name,exp)
    tmp_data = pd.read_csv("%s/scores_svm.csv" % (file_path), 
                           sep=',', index_col=0)
    tmp_data['exp'] = exp 
    scores_svm_all = pd.concat([scores_svm_all,tmp_data],axis=0)


scores_svm_all.to_csv("tmp/%s/scores_svm_all.csv" % (exp_name), float_format='%.6f', index=True)


scores_svm_all['model'] = 'SVM'

scores_all = scores_svm_all

scores_all_mean = scores_all.groupby(['model','exp']).mean()
scores_all_std = scores_all.groupby(['model','exp']).std()
scores_all_mean.to_csv("tmp/%s/scores_svm_mean.csv" % (exp_name), float_format='%.2f', index=True)
scores_all_std.to_csv("tmp/%s/scores_svm_std.csv" % (exp_name), float_format='%.2f', index=True)
scores_all_mean, scores_all_std

(                     0
 model exp             
 SVM   60     18.933333
       90     26.560000
       120    31.200000
       200    38.666667
       300    43.893333
       600    50.053333
       900    52.346667
       1500   54.533333
       6000   63.360000
       19800  72.933333,                     0
 model exp            
 SVM   60     1.523155
       90     2.354004
       120    0.889444
       200    2.351359
       300    1.564609
       600    2.232288
       900    1.994660
       1500   1.002220
       6000   1.241504
       19800  0.000000)