## 라이브러리 import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sp

## 1. ANOVA 기반 3 Class 구분성 상위 특징 추출

In [None]:
# 추출한 특징 불러오기
MC_FeatureData = pd.read_csv('./ProcessedData/MC_FeatureData', sep=',', header=None)

# 특징 데이터 정상/고장1/고장2 분리
NoOfData = int(MC_FeatureData.shape[1]/3)
Normal_FeatureData    = MC_FeatureData.iloc[ : ,            :   NoOfData ]
Abnormal1_FeatureData = MC_FeatureData.iloc[ : ,   NoOfData : 2*NoOfData ]
Abnormal2_FeatureData = MC_FeatureData.iloc[ : , 2*NoOfData :            ]

print(Normal_FeatureData.shape, Abnormal1_FeatureData.shape, Abnormal2_FeatureData.shape)

In [None]:
# ANOVA 기반 구분성 상위 특징값 선택
NoOfData = 180
MC_P_value_Rank = pd.read_csv('./ProcessedData/MC_P_value_Rank_ANOVA', sep = ',', header=None)
Rank = 30 #구분성이 높은 3개짜리

Normal    = np.zeros((Rank,NoOfData))
Abnormal1 = np.zeros((Rank,NoOfData))
Abnormal2 = np.zeros((Rank,NoOfData))

for i in range(Rank):
    
    index          = int(MC_P_value_Rank.iloc[i,0])
    Normal[i,:]    = Normal_FeatureData.iloc[index,:].values
    Abnormal1[i,:] = Abnormal1_FeatureData.iloc[index,:].values
    Abnormal2[i,:] = Abnormal2_FeatureData.iloc[index,:].values

# 정상, 고장 특징값 합치기    
MC_FeatureSelected = pd.DataFrame(np.concatenate([Normal, Abnormal1, Abnormal2] , axis=1))

print("Selected Feature Data Size :", MC_FeatureSelected.shape)
print("= 데이터 %d개(정상/고장1/고장2 각 %d개씩)가 각각 %d개의 최종 선택된 특징값으로 구성됨"
      %(MC_FeatureSelected.shape[1], MC_FeatureSelected.shape[1]/3, MC_FeatureSelected.shape[0]))
# 저장
path = './ProcessedData/MC_FeatureSelected_ANOVA'
MC_FeatureSelected.to_csv(path, sep=',', header=None, index=None)

## 2. 학습/검증 데이터, 평가 데이터 분할, 레이블 만들기

### 2-1. 데이터 분할

In [None]:
MC_FeatureSelected = pd.read_csv('./ProcessedData/MC_FeatureSelected_ANOVA', sep=',', header=None)
MC_FeatureSelected = pd.DataFrame(np.transpose(MC_FeatureSelected))

NoOfData   = int(MC_FeatureSelected.shape[0]/3)   # 데이터 개수 (정상/고장1/고장2 각각)
FeatNo     = int(MC_FeatureSelected.shape[1])     # 데이터 특징 수 (=데이터 차원)

# 분할: 학습+검증 160, 평가 20개!
NormalSet    = np.array(MC_FeatureSelected.iloc[            :   NoOfData , :])
Abnormal1Set = np.array(MC_FeatureSelected.iloc[   NoOfData : 2*NoOfData , :])
Abnormal2Set = np.array(MC_FeatureSelected.iloc[ 2*NoOfData :            , :])

FeatureSelected_Reshaped = pd.DataFrame(np.concatenate([NormalSet, Abnormal1Set, Abnormal2Set] , axis=1))

In [None]:
Train_Valid_Data =  FeatureSelected_Reshaped.iloc[:-20, :] # 학습/검증데이터
temp_Test_Data = FeatureSelected_Reshaped.iloc[-20:, :]    
Test_Data = pd.DataFrame(np.concatenate([temp_Test_Data.iloc[:,        :FeatNo  ],
                                         temp_Test_Data.iloc[:,  FeatNo:2*FeatNo],
                                         temp_Test_Data.iloc[:,2*FeatNo:        ]], axis = 0))
Test_Data.shape # 평가 데이터

In [None]:
NoOfTrainData = Train_Valid_Data.shape[0]
Fold          = 5                                 # Fold 개수 선정 : 데이터 개수의 약수여야 함.
FoldDataNo    = int(NoOfTrainData/Fold)           # 1개 Fold 당 (검증)데이터 개수

# Validation Data set
for i in range(Fold):
    
    temp_Valid_Normal    = Train_Valid_Data.iloc[FoldDataNo*i : FoldDataNo*(i+1),         :  FeatNo]
    temp_Valid_Abnormal1 = Train_Valid_Data.iloc[FoldDataNo*i : FoldDataNo*(i+1),   FeatNo:2*FeatNo]
    temp_Valid_Abnormal2 = Train_Valid_Data.iloc[FoldDataNo*i : FoldDataNo*(i+1), 2*FeatNo:        ]
    temp_Valid = pd.DataFrame(np.concatenate([temp_Valid_Normal, temp_Valid_Abnormal1, temp_Valid_Abnormal2] , axis=0))
    
    s = 'Validation_Fold%d = temp_Valid'%(i+1)
    exec(s)

    
# Training Data set
for i in range(Fold):
    
    temp_Train_Front = Train_Valid_Data.iloc[:FoldDataNo*i, :]
    temp_Train_Back  = Train_Valid_Data.iloc[FoldDataNo*(i+1):, :]
    temp_Train_Total = np.concatenate([temp_Train_Front , temp_Train_Back] , axis=0)
    temp_Train_Final = pd.DataFrame(np.concatenate([temp_Train_Total[:,         :  FeatNo],
                                                    temp_Train_Total[:,   FeatNo:2*FeatNo],
                                                    temp_Train_Total[:, 2*FeatNo:        ]] , axis=0))
    
    s ='Training_Fold%d  = temp_Train_Final'%(i+1)
    exec(s)
    
# 분할 결과 확인
Validation_Fold1.shape, Training_Fold1.shape #560개 중에서 20개 평가 32*3=96

### 2-2. 레이블 만들기

In [None]:
NoOfLabel_Train = int(Training_Fold1.shape[0]/3)
NoOfLabel_Valid = int(Validation_Fold1.shape[0]/3)
NoOfLabel_Test  = int(Test_Data.shape[0]/3)
NoOfLabel_Train, NoOfLabel_Valid, NoOfLabel_Test

In [None]:
# Label : Label encoding(KNN, SVM)
TrainingFold_Label   = np.zeros(3*NoOfLabel_Train , dtype=int)
ValidationFold_Label = np.zeros(3*NoOfLabel_Valid , dtype=int)
Test_Label           = np.zeros(3*NoOfLabel_Test  , dtype=int)
# 고장1, 고장2 데이터(학습용) Label 값 = 1,2
TrainingFold_Label[  NoOfLabel_Train:2*NoOfLabel_Train] = 1
TrainingFold_Label[2*NoOfLabel_Train:                 ] = 2
# 고장1, 고장2 데이터(검증용) Label 값 = 1,2
ValidationFold_Label[  NoOfLabel_Valid:2*NoOfLabel_Valid] = 1
ValidationFold_Label[2*NoOfLabel_Valid:                 ] = 2
# 고장1, 고장2 데이터(평가용) Label 값 = 1,2
Test_Label[  NoOfLabel_Test:2*NoOfLabel_Test] = 1
Test_Label[2*NoOfLabel_Test:                ] = 2

TrainingFold_Label   = pd.Series(TrainingFold_Label)
ValidationFold_Label = pd.Series(ValidationFold_Label)
Test_Label           = pd.Series(Test_Label)

In [None]:
TrainingFold_Label_forANN   = np.zeros((NoOfLabel_Train*3,3) , dtype=int)
ValidationFold_Label_forANN = np.zeros((NoOfLabel_Valid*3,3) , dtype=int)
Test_Label_forANN           = np.zeros((NoOfLabel_Test *3,3) , dtype=int)
# 정상, 고장1, 고장2 데이터 Label = [1,0,0], [0,1,0], [0,0,1]
TrainingFold_Label_forANN[                 :  NoOfLabel_Train, 0] = 1
TrainingFold_Label_forANN[  NoOfLabel_Train:2*NoOfLabel_Train, 1] = 1
TrainingFold_Label_forANN[2*NoOfLabel_Train:                 , 2] = 1

ValidationFold_Label_forANN[                 :  NoOfLabel_Valid, 0] = 1
ValidationFold_Label_forANN[  NoOfLabel_Valid:2*NoOfLabel_Valid, 1] = 1
ValidationFold_Label_forANN[2*NoOfLabel_Valid:                 , 2] = 1

Test_Label_forANN[                :  NoOfLabel_Test, 0] = 1
Test_Label_forANN[  NoOfLabel_Test:2*NoOfLabel_Test, 1] = 1
Test_Label_forANN[2*NoOfLabel_Test:                , 2] = 1

TrainingFold_Label_forANN   = pd.DataFrame(TrainingFold_Label_forANN)
ValidationFold_Label_forANN = pd.DataFrame(ValidationFold_Label_forANN)
Test_Label_forANN           = pd.DataFrame(Test_Label_forANN)

In [None]:
# 분할 데이터, 레이블 저장
for i in range(Fold):
    path1 = './K_FoldData/Training_Fold%d'  %(i+1)
    path2 = './K_FoldData/Validation_Fold%d'%(i+1)
    
    c1 = 'Training_Fold%d.to_csv(  path1, sep = ",", header = None, index = None)'%(i+1)
    c2 = 'Validation_Fold%d.to_csv(path2, sep = ",", header = None, index = None)'%(i+1)
    exec(c1)
    exec(c2)
Test_Data.to_csv('./K_FoldData/Test_Data', sep = ",", header = None, index = None)

TrainingFold_Label.to_csv(  './K_FoldData/TrainingFold_Label', header = None, index = None)
ValidationFold_Label.to_csv('./K_FoldData/ValidationFold_Label', header = None, index = None)
Test_Label.to_csv(          './K_FoldData/Test_Label', header = None, index = None)

TrainingFold_Label_forANN.to_csv(  './K_FoldData/TrainingFold_Label_forANN', sep = ",", header = None, index = None)
ValidationFold_Label_forANN.to_csv('./K_FoldData/ValidationFold_Label_forANN', sep = ",", header = None, index = None)
Test_Label_forANN.to_csv(          './K_FoldData/Test_Label_forANN', sep = ",", header = None, index = None)

In [None]:
# # 분할 이전 전체 데이터 및 레이블
# Normal_Training_All    = Train_Valid_Data.iloc[:,        :  FeatNo]
# Abnormal1_Training_All = Train_Valid_Data.iloc[:,  FeatNo:2*FeatNo]
# Abnormal2_Training_All = Train_Valid_Data.iloc[:,2*FeatNo:        ]
# Training_All = pd.DataFrame(np.concatenate([Normal_Training_All, Abnormal1_Training_All, Abnormal2_Training_All], axis = 0))

# Training_All_Label = np.zeros(NoOfTrainData*3)
# Training_All_Label[  NoOfTrainData:2*NoOfTrainData] = 1
# Training_All_Label[2*NoOfTrainData:               ] = 2
# Training_All_Label = pd.Series(Training_All_Label)

# Training_All_Label_forANN = np.zeros((NoOfTrainData*3,3))
# Training_All_Label_forANN[               :  NoOfTrainData , 0] = 1
# Training_All_Label_forANN[  NoOfTrainData:2*NoOfTrainData , 1] = 1
# Training_All_Label_forANN[2*NoOfTrainData:                , 2] = 1
# Training_All_Label_forANN = pd.DataFrame(Training_All_Label_forANN)

# Training_All.to_csv('./K_FoldData/Training_All', sep = ",", header = None, index = None)
# Training_All_Label.to_csv('./K_FoldData/Training_All_Label', sep = ",", header = None, index = None)
# Training_All_Label_forANN.to_csv('./K_FoldData/Training_All_Label_forANN', sep = ",", header = None, index = None)

## 3. 분류 모델 학습/검증(K-fold 교차검증)

In [None]:
# 데이터, 레이블 불러오기
# k-fold 학습/검증 데이터
for i in range(Fold):
    
    path1 = './K_FoldData/Training_Fold%d'%(i+1)
    path2 = './K_FoldData/Validation_Fold%d'%(i+1)
    c1 = 'Training_Fold%d   = np.array(pd.read_csv(path1, sep=",", header=None))'%(i+1)
    c2 = 'Validation_Fold%d = np.array(pd.read_csv(path2, sep=",", header=None))'%(i+1)
    exec(c1)
    exec(c2)
# K-fold 학습/검증 레이블
TrainingFold_Label   = np.array(pd.read_csv('./K_FoldData/TrainingFold_Label'  , sep=",", header=None).T.squeeze())
ValidationFold_Label = np.array(pd.read_csv('./K_FoldData/ValidationFold_Label', sep=",", header=None).T.squeeze())
# # 전체 학습용 데이터
# Training_All       = np.array(pd.read_csv('./K_FoldData/Training_All', sep = ",", header = None))
# Training_All_Label = np.array(pd.read_csv('./K_FoldData/Training_All_Label', sep = ",", header = None).T.squeeze())

### 3-1. KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# K-fold 교차검증
Add    = 0
Divide = 0

for i in range(Fold):
    c1 = 'Training_CurrentFold = Training_Fold%d'%(i+1)
    exec(c1)
    c2 = 'Validation_CurrentFold = Validation_Fold%d'%(i+1)
    exec(c2)

    knnModel_CurrentFold = KNeighborsClassifier(n_neighbors = 7).fit(Training_CurrentFold , TrainingFold_Label)
    
    c3 = 'knnscore_Fold%d = knnModel_CurrentFold.score(Validation_CurrentFold , ValidationFold_Label)'%(i+1)
    exec(c3)
        
    Add += knnModel_CurrentFold.score(Validation_CurrentFold, ValidationFold_Label)
    Divide += 1
    
Avg_accuracy = Add/Divide

print('[Result of K-fold Cross Validation] \n')
print(' Fold 1: {:.2f}% \n Fold 2: {:.2f}% \n Fold 3: {:.2f}% \n Fold 4: {:.2f}% \n Fold 5: {:.2f}%'.
        format(knnscore_Fold1*100, knnscore_Fold2*100, knnscore_Fold3*100, knnscore_Fold4*100, knnscore_Fold5*100))
print('\n Average accuracy: {:.2f}%'.format(Avg_accuracy*100))

### 3-2. SVM

In [None]:
from sklearn.svm import SVC

In [None]:
# K-fold 교차검증
Add    = 0
Divide = 0

for i in range(Fold):
    c1 = 'Training_CurrentFold = Training_Fold%d'%(i+1)
    exec(c1)
    c2 = 'Validation_CurrentFold = Validation_Fold%d'%(i+1)
    exec(c2)

    svmModel_CurrentFold = SVC(kernel = 'linear').fit(Training_CurrentFold , TrainingFold_Label)
    
    c3 = 'svmscore_Fold%d = svmModel_CurrentFold.score(Validation_CurrentFold , ValidationFold_Label)'%(i+1)
    exec(c3)
        
    Add += svmModel_CurrentFold.score(Validation_CurrentFold, ValidationFold_Label)
    Divide += 1
    
Avg_accuracy = Add/Divide

print('[Result of K-fold Cross Validation] \n')
print(' Fold 1: {:.2f}% \n Fold 2: {:.2f}% \n Fold 3: {:.2f}% \n Fold 4: {:.2f}% \n Fold 5: {:.2f}%'.
        format(svmscore_Fold1*100, svmscore_Fold2*100, svmscore_Fold3*100, svmscore_Fold4*100, svmscore_Fold5*100))
print('\n Average accuracy: {:.2f}%'.format(Avg_accuracy*100))

### 3-3. ANN

In [None]:
Fold = 5

# k-fold 학습/검증 데이터
for i in range(Fold):
    
    path1 = './K_FoldData/Training_Fold%d'%(i+1)
    path2 = './K_FoldData/Validation_Fold%d'%(i+1)
    c1 = 'Training_Fold%d   = np.array(pd.read_csv(path1, sep=",", header=None))'%(i+1)
    c2 = 'Validation_Fold%d = np.array(pd.read_csv(path2, sep=",", header=None))'%(i+1)
    exec(c1)
    exec(c2)
# K-fold 학습/검증 레이블
TrainingFold_Label   = np.array(pd.read_csv('./K_FoldData/TrainingFold_Label_forANN'  , sep=",", header=None))
ValidationFold_Label = np.array(pd.read_csv('./K_FoldData/ValidationFold_Label_forANN', sep=",", header=None))
# # 전체 학습용 데이터
# Training_All       = np.array(pd.read_csv('./K_FoldData/Training_All', sep = ",", header = None))
# Training_All_Label = np.array(pd.read_csv('./K_FoldData/Training_All_Label_forANN', sep = ",", header = None))

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
learningRate  = 0.0001
noOfNeuron    = 16
iteration     = 2000

def ANN_model(input_data):
    model = keras.Sequential()
    model.add(keras.layers.Dense(units = noOfNeuron, input_shape = (input_data.shape[1],) ))  # Input  Layer
    model.add(keras.layers.Dense(units = noOfNeuron, activation = keras.activations.relu))    # Hidden Layer 1
    model.add(keras.layers.Dense(units = noOfNeuron, activation = keras.activations.relu))    # Hidden Layer 2
    model.add(keras.layers.Dense(units = 3,          activation = keras.activations.softmax)) # Output Layer
    #레이블이 3차원이라서 units이 3개
    model.compile(optimizer= keras.optimizers.Adam(learning_rate = learningRate),
                  loss=keras.losses.categorical_crossentropy,
                  metrics=['accuracy'])
    return model

In [None]:
Label        = TrainingFold_Label
Label_Val    = ValidationFold_Label
Accuracy_sum = 0

print('[Result of K-fold Cross Validation] \n')

for i in range(Fold):
    tf.random.set_seed(777)
    
    s1= 'Data     = Training_Fold%d'  %(i+1)
    s2= 'Data_Val = Validation_Fold%d'%(i+1)
    exec(s1)
    exec(s2)
    
    model = ANN_model(Data)
    # train model
    hist = model.fit(Data, Label, epochs=iteration, verbose = 0)
    Loss, Accuracy = model.evaluate(Data_Val,  Label_Val, verbose=0)
    Accuracy_sum = Accuracy_sum + Accuracy
    print('Fold {}: {:.2f}%'.format((i+1), Accuracy*100))

print('* Average accuracy : {:.2f}%'.format((Accuracy_sum/Fold)*100))

In [None]:
#파라미터 최적화 코드가 없어서 한번 해보면 좋을듯