# Import python library

In [1]:
import os, shutil
import time
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from scipy import io, signal
from scipy.fftpack import fft, fftshift

import tensorflow as tf
from tensorflow.keras import models, layers, regularizers, optimizers
from tensorflow.keras.callbacks import LearningRateScheduler

print("Installed tensorflow version: ", tf.__version__)

Installed tensorflow version:  2.7.0


# GPU setting

In [2]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # GPU unit setting
tf.keras.backend.floatx()

'float32'

# Parameters Setting for training

In [3]:
### data parameters
winT = 18000 # one priod


### RMSprop optimizer parameters
lr_begin = 1e-3
RHO = 0.8

### Training parameters 
MAX_EPOCHS = 80
BATCH_SIZE = 128
EPOCH_1 = math.ceil(MAX_EPOCHS/2)
EPOCH_2 = math.ceil(MAX_EPOCHS*3/4)

# Load measured datasets

In [4]:
print("[Info.] where am I?")
%pwd

[Info.] where am I?


'/home/wkchoi/Arc-Fault'

In [5]:
# Define signal plot function
def show_signal_subplot(n_show, data, label, YRNG_MIN=-1, YRNG_MAX=1):
    plt.figure(2, figsize=(12, 8))
    plt.subplots_adjust(wspace=0.5)
    for i in range(n_show):
        plt.subplot(5, 10, i + 1)
        plt.plot(data[i], color='cornflowerblue')
        plt.text(int(data.shape[1]/2), 0, "%d" % label[i], fontsize=12, color='r')
        plt.xlim(0, data.shape[1])
        plt.ylim(YRNG_MIN, YRNG_MAX)
        
def data_shuffle(data):
    s = np.arange(data.shape[0])
    np.random.shuffle(s)
    
    return data[s]
    
    

def gen_one_period_data(file_path, winT):
    
    ##### load raw data
    data_raw = pd.read_excel(file_path, index_col=None, header=None, sheet_name='Sheet1')
    data = data_raw.to_numpy() # numpy array
    
    ##### Data Normalization
    data_n = np.zeros((data.shape[0],data.shape[1]))
    for i in range(data.shape[1]):
        data_n[:,i] = data[:,i]/max(abs(data[:,i]))  
    
    ##### expand one-period dataset
    dT = round(winT / 10)
    Num = round((data_n.shape[0] - winT)/dT)
    
    data_tot = np.zeros((winT,Num*data_n.shape[1]))
    index = 0
    for i in range(data_n.shape[1]):
        for j in range(Num):
            index = Num*i+j
            data_tot[:,index] = data_n[j*dT:winT+j*dT,i]
            
    print("[Info.] Total re-arranged one-period data shape: ", data_tot.shape)
    
    return data_tot

def merge_one_period_data(winT, *file_paths):
    
    subArrays = []
    for file_path in file_paths:
        data = gen_one_period_data(file_path, winT)
        subArrays.append(data)
                
    allArrays = np.concatenate(subArrays, axis=1)
    
    print("[Info.] ==========> Total merged one-period data shape: ", allArrays.shape, "\n")
        
    return allArrays

In [6]:
base_dir = "/home/wkchoi/Arc-Fault"

## 1. Fan dataset

In [7]:
### Fan arc raw dataset
file_path1 = os.path.join(base_dir, "Fan/arc/Fan_1_arc_humid_44(c61)_data.xlsx")
file_path2 = os.path.join(base_dir, "Fan/arc/Fan_2_arc_humid_45_data.xlsx")
file_path3 = os.path.join(base_dir, "Fan/arc/Fan_1_arc_humid_34(c14)_c2_data.xlsx")
file_path4 = os.path.join(base_dir, "Fan/arc/Fan_2_arc_humid_34(c23)_c15_data.xlsx")
file_path5 = os.path.join(base_dir, "Fan/arc/fan_1_arc_data.xlsx")
file_path6 = os.path.join(base_dir, "Fan/arc/fan_2_arc(c57)_data.xlsx")
file_paths = [file_path1, file_path2, file_path3, file_path4, file_path5, file_path6]
fan_arc = merge_one_period_data(winT, *file_paths)

##### Fan normal raw dataset
file_path1 = os.path.join(base_dir, "Fan/normal/Fan_1_normal_humid_room_data.xlsx")
file_path2 = os.path.join(base_dir, "Fan/normal/Fan_2_normal_humid_50_data.xlsx")
file_path3 = os.path.join(base_dir, "Fan/normal/fan_1_normal_data.xlsx")
file_path4 = os.path.join(base_dir, "Fan/normal/fan_2_normal(c54)_data.xlsx")
file_paths = [file_path1, file_path2, file_path3, file_path4]
fan_normal = merge_one_period_data(winT, *file_paths)


[Info.] Total re-arranged one-period data shape:  (18000, 1480)
[Info.] Total re-arranged one-period data shape:  (18000, 1560)
[Info.] Total re-arranged one-period data shape:  (18000, 80)
[Info.] Total re-arranged one-period data shape:  (18000, 600)
[Info.] Total re-arranged one-period data shape:  (18000, 2000)
[Info.] Total re-arranged one-period data shape:  (18000, 1880)

[Info.] Total re-arranged one-period data shape:  (18000, 2000)
[Info.] Total re-arranged one-period data shape:  (18000, 2000)
[Info.] Total re-arranged one-period data shape:  (18000, 2000)
[Info.] Total re-arranged one-period data shape:  (18000, 2160)



In [59]:

#### Fan Data shuffle
fan_arc_1 = data_shuffle(fan_arc.T)
fan_arc_2 = fan_arc_1.T[:,:]

fan_normal_1 = data_shuffle(fan_normal.T)
fan_normal_2 = fan_normal_1.T[:,:7600]

In [60]:
print(fan_arc_2.shape)
print(fan_normal_2.shape)


(18000, 7600)
(18000, 7600)


In [61]:
print(fan_arc_2.shape)
print(fan_normal_2.shape)


(18000, 7600)
(18000, 7600)


# Feature Extraction

In [62]:
# 특성추출을 위한 함수 작성 ( 변수이름, 데이터 크기)
def create_feature(x, y):
    feature = np.zeros(shape=(39,y))
    mean_val = np.zeros((10, y))
    std_val = np.zeros((10, y))
    Pole_diff = np.zeros((10, y))
    Avg_diff = np.zeros((9, y))

    for i in range(y):
        sig = x[:, i]
        win = round(x.shape[0]/10)
        for j in range(10):
            tmp = x[int(j * win) : int((j + 1) * win)]
            mean_val[j] = np.mean(tmp[j],axis=0)
            std_val[j] = np.std(tmp[j])
            Pole_diff[j] = np.max(tmp[j]) - np.min(tmp[j])
        for z in range(9):
            Avg_diff[z] = np.mean(tmp[z], axis=0) - np.mean(tmp[z+1],axis=0)
        
    feature = np.concatenate((mean_val, std_val, Pole_diff, Avg_diff), axis =0)
    return feature

In [63]:
fan_feature_arc = create_feature(fan_arc_2, 7600)
fan_feature_norm = create_feature(fan_normal_2, 7600)

### Arc_Total 특성 추출

In [64]:
print(fan_feature_norm.shape)
print(fan_feature_arc.shape)
print(fan_feature_norm.T.shape)
print(fan_feature_arc.T.shape)
FFN = fan_feature_norm.T
FFA = fan_feature_arc.T

(39, 7600)
(39, 7600)
(7600, 39)
(7600, 39)


# Data Shuffling

In [65]:
##### Fan Labeling
norm_fan_label = np.zeros(FFN.shape[1]) # label 0
arc_fan_label = np.ones(FFA.shape[1])  # label 1

In [66]:
##### Prepare full datasets for training

Train_raw = np.concatenate((fan_feature_norm, fan_feature_arc), axis=0)
Train_label = np.concatenate((norm_fan_label, arc_fan_label))
print('[Info.] Total dataset shape: ', Train_raw.shape)

[Info.] Total dataset shape:  (78, 7600)


In [67]:
Train_label

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [68]:
# s라는 배열에 data의 인덱스를 넣고 섞은 뒤 data와 data_label 배열에 대입.(train)
s = np.arange(Train_raw.shape[0])
np.random.shuffle(s)

data_tr = Train_raw[s]
data_tr_label = Train_label[s]
print("===> Display label numbers after data shuffling:", "\n", data_tr_label[0:100])

===> Display label numbers after data shuffling: 
 [0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0.
 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 1.
 1. 0. 1. 1. 0. 1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1.
 0. 0. 1. 0. 1. 0.]


In [69]:
SPL_RATIO = 0.8 
index = round(data_tr.shape[0]*SPL_RATIO)

### train 데이터와 test 데이터로 분리
train_X, train_Y = data_tr[:index], data_tr_label[:index]
test_X, test_Y = data_tr[index:], data_tr_label[index:]
print('[Info.] Number of training dataset: ', len(train_X))
print('[Info.] Number of test dataset: ', len(test_X))

### Save trainind and test datasets and labels
dataset_dir = './TrTedata'
if os.path.exists(dataset_dir):  # 반복적인 실행을 위해 디렉토리를 삭제
    shutil.rmtree(dataset_dir)   
os.mkdir(dataset_dir)

# training
train_X_dir = os.path.join(dataset_dir, 'train_X.npy')
np.save(train_X_dir, train_X)

train_Y_dir = os.path.join(dataset_dir, 'train_Y.npy')
np.save(train_Y_dir, train_Y)

# test
test_X_dir = os.path.join(dataset_dir, 'test_X.npy')
np.save(test_X_dir, test_X)

test_Y_dir = os.path.join(dataset_dir, 'test_Y.npy')
np.save(test_Y_dir, test_Y)

[Info.] Number of training dataset:  62
[Info.] Number of test dataset:  16


In [70]:
### Training and test datasets
x_train = np.reshape(train_X, (len(train_X),train_X.shape[1],1))
x_test = np.reshape(test_X, (len(test_X),train_X.shape[1],1))

### Training and test labeling
y_train = np.asarray(train_Y).astype('float32')
y_test = np.asarray(test_Y).astype('float32')

print('[Info.] x_train numpy shape: ', x_train.shape)
print('[Info.] x_test numpy shape: ', x_test.shape)
print('[Info.] y_train numpy shape: ', y_train.shape)
print('[Info.] y_test numpy shape: ', y_test.shape)

[Info.] x_train numpy shape:  (62, 7600, 1)
[Info.] x_test numpy shape:  (16, 7600, 1)
[Info.] y_train numpy shape:  (62,)
[Info.] y_test numpy shape:  (16,)


In [71]:
### SVM 적용위해 3차원 데이터 2차원으로 변경
x_train = x_train.reshape(62, 7600)
x_test = x_test.reshape(16, 7600)
print(x_train.shape)
print(x_test.shape)

(62, 7600)
(16, 7600)


# SVM 적용

In [72]:
features = x_train
target = y_train

In [73]:
from sklearn.svm import SVC
from sklearn import svm, metrics
import numpy as np
import matplotlib.pyplot as plt

svc = SVC(kernel = 'linear', C = 10)

In [74]:
model = svc.fit(features, target)

In [75]:
# 정확도 계산.
print('학습용 데이터로 측정한 정확도 = %.2f' % svc.score(x_train, y_train))
print('시험용 데이터로 측정한 정확도 = %.2f' % svc.score(x_test, y_test))

학습용 데이터로 측정한 정확도 = 0.52
시험용 데이터로 측정한 정확도 = 0.44


In [76]:
from sklearn.metrics import confusion_matrix
y_pred = svc.predict(features)
confusion_matrix(target, y_pred)

array([[ 8, 23],
       [ 7, 24]])

## kernel SVM 적합 및 비교

### LinearSVC

In [77]:
clf = svm.LinearSVC(C = 10)
clf.fit(features, target)
y_pred = clf.predict(features)
confusion_matrix(target, y_pred)



array([[ 0, 31],
       [ 0, 31]])

### radial basis function (방사 기저 함수)

In [78]:
clf_r = svm.SVC(kernel = 'rbf', C=100, gamma =100)
clf_r.fit(features, target)
y_pred = clf_r.predict(features)
confusion_matrix(target, y_pred)

array([[30,  1],
       [ 4, 27]])

In [79]:
# 정확도 계산.
print('학습용 데이터로 측정한 정확도 = %.2f' % clf_r.score(x_train, y_train))
print('시험용 데이터로 측정한 정확도 = %.2f' % clf_r.score(x_test, y_test))

학습용 데이터로 측정한 정확도 = 0.92
시험용 데이터로 측정한 정확도 = 0.62


### polynomial kernel (다항식)

In [80]:
clf_p = svm.SVC(kernel = 'rbf',degree = 3, C = 0.1,gamma = 'auto')
clf_p.fit(features, target)
y_pred = clf_p.predict(features)
confusion_matrix(target, y_pred)

array([[17, 14],
       [ 7, 24]])

In [81]:
# 정확도 계산.
print('학습용 데이터로 측정한 정확도 = %.2f' % clf_p.score(x_train, y_train))
print('시험용 데이터로 측정한 정확도 = %.2f' % clf_p.score(x_test, y_test))

학습용 데이터로 측정한 정확도 = 0.66
시험용 데이터로 측정한 정확도 = 0.50


# Model Evaluation

In [82]:
# 모델 성능
### rbf 커널 적용 시
pre = clf_r.predict(x_test)

ac_score = metrics.accuracy_score(y_test, pre)
cl_report = metrics.classification_report(y_test, pre)
print("정답률 = ",ac_score)
print("리포트 =\n", cl_report)

정답률 =  0.625
리포트 =
               precision    recall  f1-score   support

         0.0       0.67      0.50      0.57         8
         1.0       0.60      0.75      0.67         8

    accuracy                           0.62        16
   macro avg       0.63      0.62      0.62        16
weighted avg       0.63      0.62      0.62        16



# Grid Search

In [83]:
import timeit
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow.keras import models, layers, regularizers, optimizers
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical

from sklearn.svm import SVC
from sklearn import svm, metrics, model_selection
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.pipeline import Pipeline
from sklearn.metrics.pairwise import rbf_kernel

In [90]:

values = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
param_grid = [{'kernel':['rbf'], 'C':values, 'gamma':values},
               {'kernel':['linear'], 'C':values}]
gs = GridSearchCV(SVC(), param_grid, cv= KFold(n_splits=5, shuffle=True), scoring='accuracy', verbose = 3)
gs.fit(x_train, y_train)

Fitting 5 folds for each of 110 candidates, totalling 550 fits
[CV 1/5] END ..C=1e-05, gamma=1e-05, kernel=rbf;, score=0.385 total time=   0.0s
[CV 2/5] END ..C=1e-05, gamma=1e-05, kernel=rbf;, score=0.385 total time=   0.0s
[CV 3/5] END ..C=1e-05, gamma=1e-05, kernel=rbf;, score=0.333 total time=   0.0s
[CV 4/5] END ..C=1e-05, gamma=1e-05, kernel=rbf;, score=0.417 total time=   0.0s
[CV 5/5] END ..C=1e-05, gamma=1e-05, kernel=rbf;, score=0.417 total time=   0.0s
[CV 1/5] END .C=1e-05, gamma=0.0001, kernel=rbf;, score=0.385 total time=   0.0s
[CV 2/5] END .C=1e-05, gamma=0.0001, kernel=rbf;, score=0.385 total time=   0.0s
[CV 3/5] END .C=1e-05, gamma=0.0001, kernel=rbf;, score=0.333 total time=   0.0s
[CV 4/5] END .C=1e-05, gamma=0.0001, kernel=rbf;, score=0.417 total time=   0.0s
[CV 5/5] END .C=1e-05, gamma=0.0001, kernel=rbf;, score=0.417 total time=   0.0s
[CV 1/5] END ..C=1e-05, gamma=0.001, kernel=rbf;, score=0.385 total time=   0.0s
[CV 2/5] END ..C=1e-05, gamma=0.001, kernel=rb

[CV 4/5] END ...C=0.001, gamma=0.01, kernel=rbf;, score=0.417 total time=   0.0s
[CV 5/5] END ...C=0.001, gamma=0.01, kernel=rbf;, score=0.417 total time=   0.0s
[CV 1/5] END ....C=0.001, gamma=0.1, kernel=rbf;, score=0.385 total time=   0.0s
[CV 2/5] END ....C=0.001, gamma=0.1, kernel=rbf;, score=0.385 total time=   0.0s
[CV 3/5] END ....C=0.001, gamma=0.1, kernel=rbf;, score=0.333 total time=   0.0s
[CV 4/5] END ....C=0.001, gamma=0.1, kernel=rbf;, score=0.417 total time=   0.0s
[CV 5/5] END ....C=0.001, gamma=0.1, kernel=rbf;, score=0.417 total time=   0.0s
[CV 1/5] END ......C=0.001, gamma=1, kernel=rbf;, score=0.385 total time=   0.0s
[CV 2/5] END ......C=0.001, gamma=1, kernel=rbf;, score=0.385 total time=   0.0s
[CV 3/5] END ......C=0.001, gamma=1, kernel=rbf;, score=0.333 total time=   0.0s
[CV 4/5] END ......C=0.001, gamma=1, kernel=rbf;, score=0.417 total time=   0.0s
[CV 5/5] END ......C=0.001, gamma=1, kernel=rbf;, score=0.417 total time=   0.0s
[CV 1/5] END .....C=0.001, g

[CV 2/5] END ......C=0.1, gamma=100, kernel=rbf;, score=0.385 total time=   0.0s
[CV 3/5] END ......C=0.1, gamma=100, kernel=rbf;, score=0.333 total time=   0.0s
[CV 4/5] END ......C=0.1, gamma=100, kernel=rbf;, score=0.417 total time=   0.0s
[CV 5/5] END ......C=0.1, gamma=100, kernel=rbf;, score=0.417 total time=   0.0s
[CV 1/5] END .....C=0.1, gamma=1000, kernel=rbf;, score=0.385 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=1000, kernel=rbf;, score=0.385 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=1000, kernel=rbf;, score=0.333 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=1000, kernel=rbf;, score=0.417 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=1000, kernel=rbf;, score=0.417 total time=   0.0s
[CV 1/5] END ....C=0.1, gamma=10000, kernel=rbf;, score=0.385 total time=   0.0s
[CV 2/5] END ....C=0.1, gamma=10000, kernel=rbf;, score=0.385 total time=   0.0s
[CV 3/5] END ....C=0.1, gamma=10000, kernel=rbf;, score=0.333 total time=   0.0s
[CV 4/5] END ....C=0.1, gamm

[CV 5/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.667 total time=   0.0s
[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.538 total time=   0.0s
[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.462 total time=   0.0s
[CV 3/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.667 total time=   0.0s
[CV 4/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.583 total time=   0.0s
[CV 5/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.750 total time=   0.0s
[CV 1/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.538 total time=   0.0s
[CV 2/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.538 total time=   0.0s
[CV 3/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.667 total time=   0.0s
[CV 4/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.500 total time=   0.0s
[CV 5/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.667 total time=   0.0s
[CV 1/5] END ........C=100, gamma=1, kernel=rbf;, score=0.615 total time=   0.0s
[CV 2/5] END ........C=100, 

[CV 2/5] END ..C=10000, gamma=10000, kernel=rbf;, score=0.385 total time=   0.0s
[CV 3/5] END ..C=10000, gamma=10000, kernel=rbf;, score=0.583 total time=   0.0s
[CV 4/5] END ..C=10000, gamma=10000, kernel=rbf;, score=0.417 total time=   0.0s
[CV 5/5] END ..C=10000, gamma=10000, kernel=rbf;, score=0.417 total time=   0.0s
[CV 1/5] END ............C=1e-05, kernel=linear;, score=0.385 total time=   0.0s
[CV 2/5] END ............C=1e-05, kernel=linear;, score=0.385 total time=   0.0s
[CV 3/5] END ............C=1e-05, kernel=linear;, score=0.333 total time=   0.0s
[CV 4/5] END ............C=1e-05, kernel=linear;, score=0.417 total time=   0.0s
[CV 5/5] END ............C=1e-05, kernel=linear;, score=0.417 total time=   0.0s
[CV 1/5] END ...........C=0.0001, kernel=linear;, score=0.385 total time=   0.0s
[CV 2/5] END ...........C=0.0001, kernel=linear;, score=0.385 total time=   0.0s
[CV 3/5] END ...........C=0.0001, kernel=linear;, score=0.333 total time=   0.0s
[CV 4/5] END ...........C=0.

GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=True),
             estimator=SVC(),
             param_grid=[{'C': [1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100,
                                1000, 10000],
                          'gamma': [1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100,
                                    1000, 10000],
                          'kernel': ['rbf']},
                         {'C': [1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100,
                                1000, 10000],
                          'kernel': ['linear']}],
             scoring='accuracy', verbose=3)

In [91]:
print("최적 하이퍼 파라미터: ", gs.best_params_)
print("최고 예측 정확도: {:.4f}".format(gs.best_score_))

최적 하이퍼 파라미터:  {'C': 10000, 'gamma': 0.1, 'kernel': 'rbf'}
최고 예측 정확도: 0.7115
