In [None]:
import time
import math
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error

from utils.ML import time_series_dataframe, make_dataframe
from utils.DL import new_multistep_time_series, plot_and_loss2
from utils.DL import train_tmp, evaluate2
from utils.DL import TransAm

### First Step - Fault Diagnositics using Machine Learning

In [None]:
##### Load Dataset ######
print(f'>>> Load Dataset!')

df_ML = make_dataframe(path_temp_gps='./temp_add_gps/',
                       window_size=120,
                       stride=40)

X = df_ML.iloc[:, :9].values
y = df_ML['label'].values

# Data Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, shuffle = True)

# Preprocessing
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
x_test_scaled = scaler.transform(X_test)

print(f'X_train Shape : {X_train_scaled.shape}')
print(f'X_test Shape : {x_test_scaled.shape}')
print(f'>>> Successfully load dataset!')

In [None]:
##### Load Model ######
print(f'>>> Load Model!')

RFC = RandomForestClassifier(n_estimators=50, 
                             max_depth = 30, 
                             random_state = 42, 
                             min_samples_leaf=8, 
                             min_samples_split=8)

print(f'>>> Successfully load model!')

In [None]:
##### Train & Test ######

RFC.fit(X_train_scaled, y_train)
RFC_predict = RFC.predict(x_test_scaled)
RFC_acc = accuracy_score(y_test, RFC_predict)

print("Random Forest Classifier Precition Accuracy : ", RFC_acc)

### Second Step - Time-Series Prediction using Transformer

In [None]:
# Hyper Parameters
input_window = 240
output_window = 120
epochs = 500
# batch_size = 1024
batch_size = 128
lr = 0.001

device = 'cuda' if torch.cuda.is_available else 'cpu'

In [None]:
##### Load Dataset #####
print(f'>>> Load Dataset!')

df_DL, _, _ = time_series_dataframe('./temp_add_gps/')

df_DL_temp = df_DL['TEMP'].values
df_DL_label = df_DL['label'].values

# Preprocessing
scaler_train = MinMaxScaler()
scaler_test = MinMaxScaler()

train_len = int(len(df_DL) * 0.8)

train_data = df_DL_temp[:train_len]
train_label = df_DL_label[:train_len]
test_data = df_DL_temp[train_len:]
test_label = df_DL_label[train_len:]

train_data = scaler_train.fit_transform(train_data.reshape(-1,1)).reshape(-1)
test_data = scaler_test.fit_transform(test_data.reshape(-1,1)).reshape(-1)

train_data, _ = new_multistep_time_series(train_data, train_label, input_window, output_window)
test_data, test_label = new_multistep_time_series(test_data, test_label, input_window, output_window)

train_data = train_data.to(device)
test_data = test_data.to(device)

print(f'X_train Shape : {train_data.shape}')
print(f'X_test Shape : {test_data.shape}')
print(f'>>> Successfully load dataset!')

In [None]:
##### Load Model ######
print(f'>>> Load Model!')

model = TransAm().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(params = model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.98)

print(f'>>> Successfully load model!')

In [None]:
##### Train & Test #####
best_val_loss = float("inf")
best_model = None

for epoch in range(1, epochs + 1):
    start_time = time.time()
    train_tmp(model, train_data, batch_size, optimizer, criterion, input_window, output_window, epoch, scheduler)
    
    if (epoch % 20 == 0):
        truth, test_result, result_to_ML, val_loss = plot_and_loss2(model, test_data, criterion,input_window, output_window, scaler_test)
    else:
        val_loss = evaluate2(model, test_data, criterion, output_window, input_window)
    
    print('-' * 90)
    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f} |'.format(epoch, (time.time() - start_time),
                                        val_loss, math.exp(val_loss)))
    print('-' * 90)
    
    scheduler.step()
    

PATH = "./checkpoints/"
torch.save(model, PATH + 'prediction_model.pt')
torch.save(model.state_dict(), PATH + 'prediction_model_state_dict.pt')

### Last Step - Fault Prognostics

In [None]:
PATH = './checkpoints/'
model = torch.load(PATH + 'prediction_model.pt', map_location=torch.device('cuda:0')) 
model.load_state_dict(torch.load(PATH + 'prediction_model_state_dict.pt', map_location=torch.device('cuda:0')))

In [None]:
DataFrame = {'MEAN_TEMP' : [], 'STD' : [], 'MIN' : [], 'MAX' : [], 'SKEW' : [], 'KURT' : [], 'MEDIAN':[], '25%' : [], '75%' : [], 'label' : []}

def DF_to_DataFrame(data):
    mean = np.round(np.mean(data), 3)
    min = np.min(data)
    max = np.max(data)
    std = np.std(data)
    median = data.median()
    sk = data.skew()
    kurt = data.kurt()
    a, b = np.percentile(data, q = [25,75])
    
    return mean, min, max, std, median, sk, kurt, a, b

test_input = test_data[:, 0, :]
test_input = test_input.unsqueeze(1)
test_input = test_input.transpose(0, 2)
test_input.shape

test_target = test_data[:, 1, :]
test_target = test_target.unsqueeze(1)
test_target = test_target.transpose(0, 2)
test_target.shape

def test_batch(data, index):
    data = data[:, :, index].unsqueeze(2)
    return data

def test_input_batch(data, index):
    data = data[:, :, index].unsqueeze(2)
    return data

def test_target_batch(data, index):
    data = data[:, :, index].unsqueeze(2)
    return data

def r2_score(target, output):
    avg = torch.mean(target)
    ss_res = torch.sum((target - output) ** 2)
    ss_tot = torch.sum((target - avg) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2


def test_rmse_mae_r2(target, output):
    rmse = np.sqrt(mean_squared_error(target, output))
    mae = mean_absolute_error(target, output)
    r2 = r2_score(torch.from_numpy(target), torch.from_numpy(output))
    return rmse, mae, r2

In [None]:
rmse_list = []
mae_list = []
r2_list = []

In [None]:
# Inference

model.eval()
with torch.no_grad():
    for i in tqdm(range(len(test_data))):
        input, target = test_input_batch(test_input, i), test_target_batch(test_target, i)
        output = model(input)

        output = output[-120:].detach().cpu()
        target = target[-120:].detach().cpu()

        output = scaler_test.inverse_transform(output.reshape(-1,1)).reshape(-1)
        target = scaler_test.inverse_transform(target.reshape(-1,1)).reshape(-1)

        rmse, mae, r2= test_rmse_mae_r2(target[-120:], output[-120:])
        rmse_list.append(rmse)
        mae_list.append(mae)
        r2_list.append(r2)

        output = list(output)
        output = pd.DataFrame({'TEMP' : output})
        A = output['TEMP']
        mean, min, max, std, median, skew, kurt, a, b, = DF_to_DataFrame(A)
        DataFrame['label'].append(test_label[i])
        DataFrame['MEAN_TEMP'].append(mean)
        DataFrame['MIN'].append(min)
        DataFrame['MAX'].append(max)
        DataFrame['STD'].append(std)
        DataFrame['SKEW'].append(skew)
        DataFrame['KURT'].append(kurt)
        DataFrame['MEDIAN'].append(np.round(median,3))
        DataFrame['25%'].append(np.round(a,3))
        DataFrame['75%'].append(np.round(b,3))
    
print(f"Average RMSE : {np.mean(rmse_list)}")
print(f"Average MAE : {np.mean(mae_list)}")
print(f"Average R2 : {np.mean(r2_list)}")

In [None]:
# Prognostics

DF = pd.DataFrame(DataFrame)

transformer_X = DF.iloc[:, :9].values
transformer_y = DF['label'].values

transformer_scaled = scaler.transform(transformer_X)
transformer_predict = RFC.predict(transformer_scaled)
transformer_ac = accuracy_score(transformer_predict, transformer_y)

print("Random Forest Classifier Precition Accuracy : ", transformer_ac)