# Hw04

## Read signals of Ch 1 in the 2nd test dataset of IMS bearing data. Calculate the mean, standard deviation, variation, skewness, and kurtosis of the data as the 5-dimensional features.

In [36]:
import numpy as np
import os
import pandas as pd
from scipy.stats import skew 
from scipy.stats import kurtosis

In [37]:
path, dirs, files = next(os.walk("./IMS_data/"))
Features=[]
for file in files:
    data = pd.read_csv('./IMS_data/'+file, sep = '\t',
                       names = ['ch1', 'ch2', 'ch3', 'ch4'])
    ch1 = data.ch1.values
    mn = np.mean(ch1)
    sd = np.std(ch1)
    v = np.var(ch1)
    sk = skew(ch1)
    ku = kurtosis(ch1)
    Features.append([mn, sd, v, sk, ku])
X = np.asarray(Features)
X.shape

(984, 5)

### (a) Consider the bearing was good at the first one forth of the running time, and it was failed at the last one forth of the running time. Please develop an MLP model to classify whether the bearing is good or not.

In [38]:
quater_point = len(X)//4
X_health = X[:quater_point]
X_unhealth = X[3*quater_point:]
health_len = len(X_health)
unhealth_len = len(X_unhealth)
print(health_len,unhealth_len)

246 246


In [39]:
Y_health = np.ones(health_len)
Y_unhealth = np.zeros(unhealth_len)
X_combined = np.concatenate((X_health, X_unhealth))
Y_combined = np.concatenate((Y_health, Y_unhealth))

In [40]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X_combined, Y_combined, train_size = 0.75, test_size = 0.25, random_state = 0)

In [41]:
best_score = 0
best_N = 0
for i in range(30):
    model = MLPClassifier(hidden_layer_sizes=(i+1,i+1,i+1), random_state = 1, activation = 'relu', solver = 'adam', alpha=1e-4, max_iter = 100000
                          ,learning_rate_init=.1)
    model.fit(X_train, Y_train)
    score = model.score(X_test, Y_test)
    # Y_pred = model.predict(X_test)
    # print(Y_pred)
    print(i+1,score)
    if best_score < score:
        best_score = score
        best_N = i+1
print("===============")
print("best_score = ", best_score)
print("best_N = ", best_N)

1 0.4878048780487805
2 0.5447154471544715
3 0.4878048780487805
4 0.5121951219512195
5 0.4878048780487805
6 0.4959349593495935
7 0.5121951219512195
8 0.4959349593495935
9 0.5121951219512195
10 0.4959349593495935
11 0.4796747967479675
12 0.4959349593495935
13 0.4878048780487805
14 0.5121951219512195
15 0.4878048780487805
16 0.5121951219512195
17 0.4878048780487805
18 0.5040650406504065
19 0.5040650406504065
20 0.5121951219512195
21 0.5121951219512195
22 0.5040650406504065
23 0.4959349593495935
24 0.4959349593495935
25 0.4959349593495935
26 0.5040650406504065
27 0.4959349593495935
28 0.5121951219512195
29 0.5040650406504065
30 0.4878048780487805
best_score =  0.5447154471544715
best_N =  2


### (b) Reduce the features into 3 by PCA and develop the MLP model again.

In [42]:
from sklearn.decomposition import PCA
pca_3 = PCA(n_components = 3,svd_solver = 'full')
pca_3.fit(Features)
# # Reduce the features into 3 by PCA
X_pca = pca_3.transform(Features)
X_pca.shape

(984, 3)

In [43]:
quater_point = len(X_pca)//4
X_health = X_pca[:quater_point]
X_unhealth = X_pca[3*quater_point:]
health_len = len(X_health)
unhealth_len = len(X_unhealth)

In [44]:
Y_health = np.ones(health_len)
Y_unhealth = np.zeros(unhealth_len)
X_combined = np.concatenate((X_health, X_unhealth))
Y_combined = np.concatenate((Y_health, Y_unhealth))

In [45]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X_combined, Y_combined, train_size = 0.75, test_size = 0.25, random_state = 0)

In [46]:
best_score = 0
best_N = 0
for i in range(30):
    model = MLPClassifier(hidden_layer_sizes=(i+1,i+1,i+1), random_state = 0, activation = 'relu', solver = 'adam', alpha=1e-4, max_iter = 100000
                          ,learning_rate_init=.1)
    model.fit(X_train, Y_train)
    score = model.score(X_test, Y_test)
    # Y_pred = model.predict(X_test)
    # print(Y_pred)
    print(i+1,score)
    if best_score < score:
        best_score = score
        best_N = i+1
print("===============")
print("best_score = ", best_score)
print("best_N = ", best_N)

1 0.4878048780487805
2 0.5121951219512195
3 0.4878048780487805
4 0.4959349593495935
5 0.4878048780487805
6 0.4959349593495935
7 0.4878048780487805
8 0.4959349593495935
9 0.4878048780487805
10 0.4959349593495935
11 0.4878048780487805
12 0.5040650406504065
13 0.4878048780487805
14 0.5040650406504065
15 0.4796747967479675
16 0.4959349593495935
17 0.5040650406504065
18 0.4959349593495935
19 0.4959349593495935
20 0.5040650406504065
21 0.5121951219512195
22 0.5040650406504065
23 0.5609756097560976
24 0.4796747967479675
25 0.5040650406504065
26 0.4878048780487805
27 0.4959349593495935
28 0.5040650406504065
29 0.5121951219512195
30 0.4796747967479675
best_score =  0.5609756097560976
best_N =  23
