<a href="https://colab.research.google.com/github/SeungJooKim/anomaly-detector-gearbearing/blob/master/Medium.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn import preprocessing

from numpy.random import seed

from keras.layers import Input, Dropout
from keras.layers.core import Dense 
from keras.models import Model, Sequential, load_model
from keras import regularizers
from keras.models import model_from_json

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
df = pd.read_csv('/content/gdrive/My Drive/merged_dataset_BearingTest_2.csv.txt',index_col=[0])
df.head()


In [None]:
df.info()

In [None]:
df.plot()
plt.xticks(rotation=45)
plt.show()

In [None]:
dataset_train = df['2004-02-12 11:02:39':'2004-02-13 23:52:39']
dataset_test = df['2004-02-13 23:52:39':]

dataset_train.shape, dataset_test.shape

In [None]:
dataset_train.plot()
plt.xticks(rotation=45)
plt.show()

In [None]:
dataset_test.plot()
plt.xticks(rotation=45)
plt.show()

In [None]:
scaler = preprocessing.MinMaxScaler()

X_train = pd.DataFrame(scaler.fit_transform(dataset_train), 
                              columns=dataset_train.columns, 
                              index=dataset_train.index)

In [None]:
# Random shuffle training data
X_train.sample(frac=1)

X_test = pd.DataFrame(scaler.transform(dataset_test), 
                             columns=dataset_test.columns, 
                             index=dataset_test.index)

In [None]:
X_train.plot(figsize = (12,6))
plt.xticks(rotation=45)
plt.show()

In [None]:
X_test.plot(figsize = (12,6))
plt.xticks(rotation=45)
plt.show()

In [None]:
seed(10)

act_func = 'elu'
print(X_train.shape[1])
# Input layer:
model=Sequential()

# First hidden layer, connected to input vector X. 
model.add(Dense(10,activation=act_func,
                kernel_initializer='glorot_uniform',
                kernel_regularizer=regularizers.l2(0.0),
                input_shape=(X_train.shape[1],)
              
               )
         )

# 2nd layer
model.add(Dense(2,activation=act_func,
                kernel_initializer='glorot_uniform'))

# 3rd layer
model.add(Dense(10,activation=act_func,
                kernel_initializer='glorot_uniform'))

model.add(Dense(X_train.shape[1],
                kernel_initializer='glorot_uniform'))

model.compile(loss='mse',optimizer='adam')

# Train model for 100 epochs, batch size of 10: 
NUM_EPOCHS=100
BATCH_SIZE=10

elu 활성화 함수 = 뉴럴 네트워크의 개별 뉴런에 들어오는 입력 신호의 총합을 출력 신호로 변환하는 함수.

In [None]:
model.summary()

In [None]:
history=model.fit(np.array(X_train),np.array(X_train),
                  batch_size=BATCH_SIZE, 
                  epochs=NUM_EPOCHS,
                  validation_split=0.05,
                  verbose = 1)

In [None]:
# Visualize training/validation loss:
plt.plot(history.history['loss'],
         'b',
         label='Training loss')
plt.plot(history.history['val_loss'],
         'r',
         label='Validation loss')
plt.legend(loc='upper right')
plt.xlabel('Epochs')
plt.ylabel('Loss, [mse]')
plt.ylim([0,.1])
plt.show()

In [None]:
X_pred = model.predict(np.array(X_train))
X_pred = pd.DataFrame(X_pred, 
                      columns=X_train.columns)
X_pred.index = X_train.index

scored = pd.DataFrame(index=X_train.index)
scored['Loss_mae'] = np.mean(np.abs(X_pred-X_train), axis = 1)
plt.figure()
sns.distplot(scored['Loss_mae'],
             bins = 10, 
             kde= True,
            color = 'blue');
plt.xlim([0.0,.5])
plt.show()

 정상 작동하는 데이터들을 학습시킨 후 loss값 측정해 본 그래프. 이걸 통해 loss값의 기준치를 정할 수 있음. 그래프를 통해 loss값이 0.3 이상이면 비 정상으로 판단할 수 있음.

In [None]:
X_pred = model.predict(np.array(X_test))
X_pred = pd.DataFrame(X_pred, 
                      columns=X_test.columns)
X_pred.index = X_test.index

scored = pd.DataFrame(index=X_test.index)
scored['Loss_mae'] = np.mean(np.abs(X_pred-X_test), axis = 1)
scored['Threshold'] = 0.3
scored['Anomaly'] = scored['Loss_mae'] > scored['Threshold']
scored.head(10)



In [None]:
scored.tail(10)

In [None]:
# calculate the same metrics also for the training set, and merge all data in a single dataframe:
X_pred_train = model.predict(np.array(X_train))
X_pred_train = pd.DataFrame(X_pred_train, 
                      columns=X_train.columns)
X_pred_train.index = X_train.index

scored_train = pd.DataFrame(index=X_train.index)
scored_train['Loss_mae'] = np.mean(np.abs(X_pred_train-X_train), axis = 1)
scored_train['Threshold'] = 0.3
scored_train['Anomaly'] = scored_train['Loss_mae'] > scored_train['Threshold']
print(scored)
scored = pd.concat([scored_train, scored])

In [None]:
scored_train.Anomaly.value_counts()

In [None]:
scored.Anomaly.value_counts()

In [None]:
# the model output in the time leading up to the bearing failure
scored.plot(logy=True,  figsize = (10,6), ylim = [1e-2,1e2], color = ['blue','red'])
plt.xticks(rotation=45)
plt.show()