# Prerequisites
Understanding in
- machine learning and deep learning
- python syntax
- python libraries: numpy, pandas, Pytorch

# Introduction
This notebook shows the study of the [CWRU Bearing Dataset](https://csegroups.case.edu/bearingdatacenter/home), which contains data of normal and fault bearings. Artificial defects of different diameters (0.007 ~ 0.028 Inches) are manufactured at different locations of the bearings: inner raceway(IR), outer raceway(OR) and ball(B) defects. 

Vibration data was recorded for motor loads of 0 to 3 hp (motor speed of 1797 to 1720 RPM) using accelerometers at the drive end (DE) and fan end (FE) and the data is stored as Matlab files. The sampling rate is 12 kHz and each Matlab file contains between ~120k to ~240k sample points. For more information please refer to the [website](https://csegroups.case.edu/bearingdatacenter/home).

This study focuses on the classification of the drive end bearing defects using only the signal data at **DE**. It is a **multiclass classification** problem. The input is the vibration signal data at DE and the output is the type of defects:
- 0 : Normal (N), 
- 1 : Fault at Ball (B),
- 2 : Fault at Inner Raceway (IR), 
- 3 : Fault at Outer Raceway (OR), 



# Import

In [1]:
# Data science libraries
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Pytorch
import torch
from torch import nn
from torch.nn import functional as F
from torch import Tensor
from torch.utils.data import TensorDataset, DataLoader
from torch import optim
from torch.nn.modules.loss import CrossEntropyLoss

# Others
from IPython.core.debugger import set_trace
from pathlib import Path

from helper import get_df_all, download
from train_helper import get_dataloader, fit, validate 
import nn_model
from data_urls import URLS

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
working_dir = Path('.')
DATA_PATH = Path("./Data")
save_model_path = working_dir / 'Model'
DE_path = DATA_PATH / '12k_DE'

for path in [DATA_PATH, save_model_path]:
    if not path.exists():
        path.mkdir(parents=True)

In [3]:
# # Uncomment this to download the 12k_DE data if needed
# for name, url in URLS["DE_12k"].items():
#     download(url, DE_path, name, suffix=".mat")

In [4]:
#### HYPERPARAMETERS ####
bs = 64
lr = 0.001
wd = 1e-5
betas=(0.99, 0.999)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)
random_seed = 42

cuda


# Load Data and Preprocessing

In [5]:
df_all = get_df_all(DE_path, segment_length=500, normalize=True)
features = df_all.columns[2:]
target = 'label'

                                                         DE_time  \
B007_0.mat     [[-0.0027613972055888225], [-0.096324031936127...   
B007_1.mat     [[-0.07212119760479042], [0.30310395209580837]...   
B007_2.mat     [[-0.3144744111776447], [0.12198878243512974],...   
B007_3.mat     [[0.09031393213572854], [0.22497265469061875],...   
B014_0.mat     [[-0.46781317365269465], [0.1790035129740519],...   
...                                                          ...   
OR021@3_3.mat  [[0.08868958083832335], [0.06854762475049901],...   
OR021@6_0.mat  [[0.10436457085828342], [0.017461776447105788]...   
OR021@6_1.mat  [[-0.025989620758483035], [-0.0272078842315369...   
OR021@6_2.mat  [[-0.08771497005988023], [-0.00203043912175648...   
OR021@6_3.mat  [[-0.27613972055888225], [0.0625375249500998],...   

                                                         FE_time  \
B007_0.mat     [[-0.24716181818181818], [0.14279090909090908]...   
B007_1.mat     [[0.07745636363636363], [0.04561

In [6]:
df_all.sample(5)

Unnamed: 0,label,filename,0,1,2,3,4,5,6,7,...,490,491,492,493,494,495,496,497,498,499
15479,3,OR021@12_1.mat,0.316805,0.20828,0.382612,0.294869,-0.026085,0.087057,0.283324,0.04434,...,-1.584677,0.853653,1.740318,-2.180405,-2.749579,1.561368,1.572914,-2.3582,-1.589295,1.574068
12027,3,OR007@12_3.mat,0.347905,-0.000213,-0.028113,0.120899,0.552718,0.384683,0.022615,0.200161,...,0.467749,0.187479,-0.816293,-0.26146,0.619298,0.321907,0.002324,0.179236,0.282593,-0.074402
10315,0,Normal_3.mat,1.548705,2.132356,1.751854,0.587778,-0.534379,-0.834265,-0.298984,0.29434,...,-2.881879,-2.056384,-0.727854,0.429773,0.816724,0.732884,0.719986,0.56843,0.084742,-0.921329
7667,2,IR028_3.mat,-0.437454,2.169837,1.95281,-0.541765,-0.81515,0.873113,1.275527,0.635322,...,-0.769669,-0.792904,0.495416,0.771273,0.278389,0.69168,1.697717,1.275033,-0.359838,-0.653987
13047,3,OR007@3_3.mat,0.967685,-1.387162,-1.362214,1.041512,1.576126,-0.53178,-1.328609,0.202423,...,1.050677,0.474821,-0.695728,-0.555201,0.517591,0.643861,-0.256327,-0.580659,0.029819,0.563415


In [7]:
df_all.shape

(17987, 502)

In [8]:
## Split the data into train and validation set
X_train, X_valid, y_train, y_valid = train_test_split(df_all[features], 
                                                      df_all[target], 
                                                      test_size=0.20, random_state=random_seed, shuffle=True
                                                     )

In [9]:
## Create DataLoader of train and validation set
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_valid = torch.tensor(X_valid.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
y_valid = torch.tensor(y_valid.values, dtype=torch.long)

train_ds = TensorDataset(X_train, y_train)
valid_ds = TensorDataset(X_valid, y_valid)
train_dl, valid_dl = get_dataloader(train_ds, valid_ds, bs)

# Training with Adams Optimizer

In [10]:
## Instantiate model, optimizer and loss function
model = nn_model.CNN_1D_2L(len(features))
model.to(device)
opt = optim.Adam(model.parameters(), lr=lr, betas=betas, weight_decay=wd)
loss_func = CrossEntropyLoss()

In [11]:
%%time
## Train
epochs = 20
model, metrics = fit(epochs, model, loss_func, opt, train_dl, valid_dl, train_metric=False)

EPOCH 	 Train Loss 	 Val Loss 	 Train Acc 	 Val Acc 	
0 	 0.00972 	 0.30471 	 0.00000 	0.87771 	
1 	 0.00267 	 0.28640 	 0.00000 	0.89744 	
2 	 0.00138 	 0.12245 	 0.00000 	0.95636 	
3 	 0.00103 	 0.25903 	 0.00000 	0.90411 	
4 	 0.00106 	 0.09459 	 0.00000 	0.96109 	
5 	 0.00093 	 0.10815 	 0.00000 	0.96804 	
6 	 0.00076 	 0.15685 	 0.00000 	0.95525 	
7 	 0.00090 	 0.16750 	 0.00000 	0.94136 	
8 	 0.00045 	 0.11840 	 0.00000 	0.95859 	
9 	 0.00038 	 0.11844 	 0.00000 	0.95553 	
10 	 0.00026 	 0.06812 	 0.00000 	0.97443 	
11 	 0.00021 	 0.08259 	 0.00000 	0.96998 	
12 	 0.00024 	 0.05450 	 0.00000 	0.98027 	
13 	 0.00037 	 0.01944 	 0.00000 	0.99361 	
14 	 0.00034 	 0.07288 	 0.00000 	0.97443 	
15 	 0.00050 	 0.17354 	 0.00000 	0.95303 	
16 	 0.00056 	 0.13825 	 0.00000 	0.96637 	
17 	 0.00054 	 0.10775 	 0.00000 	0.96581 	
18 	 0.00032 	 0.17109 	 0.00000 	0.95025 	
19 	 0.00021 	 0.09135 	 0.00000 	0.96859 	
CPU times: total: 11.5 s
Wall time: 12.1 s


In [12]:
%%time
## Train
epochs = 20
model, metrics = fit(epochs, model, loss_func, opt, train_dl, valid_dl, train_metric=False)

EPOCH 	 Train Loss 	 Val Loss 	 Train Acc 	 Val Acc 	
0 	 0.00022 	 0.24229 	 0.00000 	0.93274 	
1 	 0.00020 	 0.05039 	 0.00000 	0.98193 	
2 	 0.00029 	 0.17963 	 0.00000 	0.94997 	
3 	 0.00029 	 0.07733 	 0.00000 	0.97721 	
4 	 0.00028 	 0.04317 	 0.00000 	0.98555 	
5 	 0.00017 	 0.03448 	 0.00000 	0.99138 	
6 	 0.00018 	 0.07745 	 0.00000 	0.97665 	
7 	 0.00014 	 0.06705 	 0.00000 	0.98054 	
8 	 0.00033 	 0.06601 	 0.00000 	0.97860 	
9 	 0.00022 	 0.04243 	 0.00000 	0.98777 	
10 	 0.00020 	 0.08321 	 0.00000 	0.97193 	
11 	 0.00018 	 0.04951 	 0.00000 	0.98888 	
12 	 0.00019 	 0.05189 	 0.00000 	0.98360 	
13 	 0.00014 	 0.06283 	 0.00000 	0.97860 	
14 	 0.00022 	 0.02842 	 0.00000 	0.98999 	
15 	 0.00017 	 0.04112 	 0.00000 	0.98638 	
16 	 0.00020 	 0.02733 	 0.00000 	0.99055 	
17 	 0.00020 	 0.08156 	 0.00000 	0.97387 	
18 	 0.00017 	 0.03271 	 0.00000 	0.98944 	
19 	 0.00016 	 0.06353 	 0.00000 	0.98638 	
CPU times: total: 9.77 s
Wall time: 10.1 s


# Save trained model

In [13]:
torch.save(model.state_dict(), save_model_path / 'model_2.pth')

In [14]:
model2 = nn_model.CNN_1D_2L(len(features))

In [16]:
model2.load_state_dict(torch.load(save_model_path / 'model_2.pth'))
model2.eval()

  model2.load_state_dict(torch.load(save_model_path / 'model_2.pth'))


CNN_1D_2L(
  (layer1): Sequential(
    (0): Conv1d(1, 64, kernel_size=(9,), stride=(1,), padding=(4,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv1d(64, 128, kernel_size=(5,), stride=(1,), padding=(2,))
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): AvgPool1d(kernel_size=(2,), stride=(2,), padding=(0,))
  )
  (linear1): Linear(in_features=16000, out_features=4, bias=True)
)

In [17]:
%%time
print(validate(model, valid_dl, loss_func))

(0.06352972865203424, np.float64(0.9863813229571985), (array([2, 0, 1, ..., 3, 1, 3], shape=(3598,)), array([2, 0, 1, ..., 3, 1, 3], shape=(3598,))))
CPU times: total: 156 ms
Wall time: 151 ms
