# Import Library

In [None]:
!pip install openpyxl

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import math

import keras_tuner as kt
from keras_tuner.tuners import Hyperband

import IPython

from tqdm import tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import GRU, Dropout, Dense, Conv1D
from keras.utils.vis_utils import plot_model

import os
import warnings
warnings.filterwarnings("ignore")
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy.signal import savgol_filter
from scipy.ndimage.interpolation import shift

from xgboost import XGBRegressor, plot_importance 
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

import optuna

# Load Data

In [None]:
data_train1 = pd.read_excel('../input/heat-def/DVF5000_SPD_Spectrum1_Steel.xlsx', sheet_name = 'Sheet1' )    
data_train2 = pd.read_excel('../input/thermal-disp23-spindle/DVF5000_SPD_Spectrum2_Steel.xlsx', sheet_name = 'Sheet1' )    
data_train3 = pd.read_excel('../input/thermal-disp23-spindle/DVF5000_SPD_Spectrum3_Steel.xlsx', sheet_name = 'Sheet1' )    

In [None]:
## Error data 제거를 위한 Slicing
Slice_value = 300
data_train1 = data_train1.iloc[Slice_value:-Slice_value,:]
data_train2 = data_train2.iloc[Slice_value:-Slice_value,:]
data_train3 = data_train3.iloc[Slice_value:-Slice_value,:]

In [None]:
data_train = pd.concat([data_train1,data_train2,data_train3], ignore_index = True)

# Preprocessing

## 불필요한 column 제거

In [None]:
data_train = data_train.drop(columns = ['SCAN', 'Day', 'Time'])

In [None]:
Sensor_index = 2
if Sensor_index == 2:
    pd_raw_X = data_train[['H1', 'S2', 'S3', 'S4', 'S5', 'S6', 'X_Motor', 'Y_Motor', 'Z_Motor',
        'SPD_Motor', 'X_Servo', 'X_Screw_BRG','SPD_RPM', 'Z_Servo',
        'Y_Servo', 'Y_Screw_BRG', 'Z_Screw_BRG', 'Column_Rear',
        'Saddle_Rear', 'Bed_Mid', 'C_Axis_Gear', 'C_Axis_BRG', 'Air_Upper',
        'Air_Lower', 'Machine_Room_Air', 'Cooler_Outler_Supply',
        'Cooler_Inlet_Return']]
elif Sensor_index == 1:
    pd_raw_X = data_train[['H1', 'S2', 'S3', 'S4', 'S5', 'S6']]
elif Sensor_index == 3:
    pd_raw_X = data_train[['H1', 'SPD_RPM', 'X_Servo', 'X_Screw_BRG', 'Z_Servo']]
    
pd_raw_Y = data_train.filter(like = 'DISP')

pd_raw_X_columns = pd_raw_X.columns
pd_raw_Y_columns = pd_raw_Y.columns

In [None]:
data_slicing_index = []
data_slicing_index.append(0)
data_slicing_index.append(data_train1.shape[0])
data_slicing_index.append(data_train2.shape[0])
data_slicing_index.append(data_train.shape[0])

In [None]:
#Boolean 집합
Smoothing_Curve = 0 ##Moving Average 적용 = 1, 미적용 = 0
Time_Split_Boolean = 0 ##Sampling rate 변경 적용 = 1, 미적용 = 0
Scale_Boolean = 1 ##Scaler 적용 = 1, 미적용 = 0
TD_Boolean = 0 ## Timedistributed 적용 = 1, 미적용 = 0
Cv_Boolean = 0 ## Conv1D 적용 = 1, 미적용 = 0
Savgol_Boolean = 1 ##사비츠키 골레이 필터 적용 =1 , 미적용 = 0

In [None]:
#raise SystemExit("Exit from script")

#sys.exit("Exit from script")

## Moving Average

In [None]:
##Smoothing_Curve 0 = None
##Smoothing_Curve 1 = Moving Average 적용 (Default)
if Smoothing_Curve == 1:
    
    Window_Size = 59 ##하기의 M에 해당, 홀수로 설정하여야 계산 용이
    
    def Moving_Average(Target_Dataframe,Window_Size): ##Target Array = Dataframe 형태
        Window_array = np.ones(Window_Size)*float(1/Window_Size)
        Smooth_Sensor = np.zeros((Target_Dataframe.shape[0]-Window_Size+1,Target_Dataframe.shape[1]))
        for i in range(Target_Dataframe.shape[1]):
            ##Target_Array.iloc[:,i] = savgol_filter(Target_Array.iloc[:,i],59,3) Window안의 회귀모형 FIlter, 성능은 좋으나 Computation 과하게 소모, Length 확인 필요
            y_smooth = np.convolve(Target_Dataframe.iloc[:,i],Window_array) ## N + M - 1개 array 반환
            y_smooth = shift(y_smooth,Window_Size-1) ## N + M - 1개 array에서 뒤의 (M-1)개 절단
            y_smooth = y_smooth[2*(Window_Size-1):] ## N + M - 1개 array에서 앞쪽 (M-1)개 절단, total 2*(M-1)개 절단
            Smooth_Sensor[:,i] = y_smooth ## Sensor 길이 N에서 N-M+1개로 바뀜
        Smooth_Sensor = pd.DataFrame(Smooth_Sensor, columns = Target_Dataframe.columns)    
        return Smooth_Sensor

    pd_raw_X = Moving_Average(pd_raw_X,Window_Size)
    pd_raw_Y = Moving_Average(pd_raw_Y,Window_Size)

## Savgol_filter

In [None]:
if Savgol_Boolean == 1:   
    for i in range(pd_raw_Y.shape[1]):
        pd_raw_Y.iloc[:,i] = savgol_filter(pd_raw_Y.iloc[:,i], 1801, 5)
    for i in range(pd_raw_X.shape[1]):
        pd_raw_X.iloc[:,i] = savgol_filter(pd_raw_X.iloc[:,i], 1801, 5)

## Time_Split

In [None]:
#Sampling 단위 변경
if Time_Split_Boolean == 1:
    
    time_stride = 60
    
    def Time_split_Data(Dataframe,time_stride):
        Quote , remainder = divmod(Dataframe.shape[0],time_stride)
        split_data = np.zeros((Quote,Dataframe.shape[1]))
        for i in range(Quote):
            split_data[i,:] = Dataframe.iloc[time_stride*i,:]
        return pd.DataFrame(split_data) 
    
    pd_raw_X = Time_split_Data(pd_raw_X,time_stride)
    pd_raw_Y = Time_split_Data(pd_raw_Y,time_stride)

In [None]:
plt.figure(figsize=(30,10))
for i in range(pd_raw_X.shape[1]):
    plt.plot(pd_raw_X.iloc[:,i], label = pd_raw_X.columns[i])
plt.legend(bbox_to_anchor=(1.0, 1.0))

In [None]:
plt.figure(figsize=(30,10))
for i in range(pd_raw_Y.shape[1]-1):
    plt.plot(pd_raw_Y.iloc[:,i], label = pd_raw_Y.columns[i])
plt.legend(bbox_to_anchor=(1.0, 1.0))

## Scaling

In [None]:
#Scaler 적용
if Scale_Boolean == 1:
    #X_Scaler = MinMaxScaler()
    #Y_Scaler = MinMaxScaler()
    X_Scaler = StandardScaler()
    Y_Scaler = StandardScaler()
    pd_raw_X = pd.DataFrame( X_Scaler.fit_transform(pd_raw_X) )
    pd_raw_Y = pd.DataFrame( Y_Scaler.fit_transform(pd_raw_Y) )
    X_std = X_Scaler.scale_
    Y_std = Y_Scaler.scale_
    X_mean = X_Scaler.mean_
    Y_mean = Y_Scaler.mean_

In [None]:
plt.figure(figsize=(30,10))
for i in range(pd_raw_X.shape[1]):
    plt.plot(pd_raw_X.iloc[:,i], label = pd_raw_X.columns[i])
plt.legend(bbox_to_anchor=(1.0, 1.0))

In [None]:
plt.figure(figsize=(30,10))
for i in range(pd_raw_Y.shape[1]-1):
    plt.plot(pd_raw_Y.iloc[:,i], label = pd_raw_Y.columns[i])
plt.legend(bbox_to_anchor=(1.0, 1.0))

## Sliding Window(Batch 형태로 변환)

In [None]:
def sliding_window(X, Y, n_steps, time, stride = 1, time_distributed = 0):
        
    # 1. X, Y time delta에 따라 나누기
    if time > 0:
        X = X[:-time]
        Y = Y[time:]
    elif time == 0:
        pass
    
    # 2. 결과로 낼 x, y
    result_X = np.array([X[0:n_steps].values])
    
    if time_distributed == 1:
        result_Y = np.array([Y[0:n_steps].values])
    else: 
        result_Y = np.array([Y[n_steps-1:n_steps].values])

    # 3. step과 stride에 맞춰서 concat, time_distributed 값에 따라 다르게 Y 만들기
    
    if time_distributed == 1:
        for i in tqdm(range(1, X.shape[0]),total=math.trunc(((len(X)-(n_steps-1)-1)/stride))):
            if i * stride + n_steps > X.shape[0]:
                #print("Data set finish") # add
                break
            result_X = np.concatenate((result_X, [X[i*stride: i*stride+n_steps].values]))
            result_Y = np.concatenate((result_Y, [Y[i*stride: i*stride+n_steps].values]))
            
    else:
        for i in tqdm(range(1, X.shape[0]),total=math.trunc(((len(X)-(n_steps-1)-1)/stride))):
            if i * stride + n_steps > X.shape[0]:
                #print("Data set finish") # add
                break
            result_X = np.concatenate((result_X, [X[i*stride: i*stride+n_steps].values]))
            result_Y = np.concatenate((result_Y, [Y[i*stride+n_steps-1: i*stride+n_steps].values]))

    return result_X, result_Y

In [None]:
#raise SystemExit("Exit from script")

#sys.exit("Exit from script")

## Batch화 하기

In [None]:
def Batch_Input(one_pd_X,one_pd_Y,Time_Split_Boolean):
    box_size = int(one_pd_X.shape[0]/10) #들어온 데이터셋 10개 박스로 분할
    
    if Time_Split_Boolean == 1:
        n_steps = 30
        batch_stride = 1
        time_lag = 0 

    split_X,split_Y = sliding_window(X=one_pd_X, Y=one_pd_Y, n_steps=box_size, time=time_lag, stride=box_size, time_distributed=1)
    after_sliding_X_T, after_sliding_X_V, after_sliding_Y_T, after_sliding_Y_V = train_test_split(split_X, split_Y, test_size=0.3, random_state=11, shuffle=True)
        
    Big_split_X_T = pd.DataFrame(after_sliding_X_T[0,:])
    Big_split_Y_T = pd.DataFrame(after_sliding_Y_T[0,:])
    split_X_T,split_Y_T = sliding_window(X=Big_split_X_T, Y=Big_split_Y_T, n_steps=n_steps, time=time_lag, stride=batch_stride, time_distributed=0)   

    Small_Train_X = np.zeros((split_X_T.shape[0]*after_sliding_X_T.shape[0],split_X_T.shape[1],split_X_T.shape[2]))
    Small_Train_Y = np.zeros((split_Y_T.shape[0]*after_sliding_Y_T.shape[0],split_Y_T.shape[1],split_Y_T.shape[2]))
    Small_Valid_X = np.zeros((split_X_T.shape[0]*after_sliding_X_V.shape[0],split_X_T.shape[1],split_X_T.shape[2]))
    Small_Valid_Y = np.zeros((split_Y_T.shape[0]*after_sliding_Y_V.shape[0],split_Y_T.shape[1],split_Y_T.shape[2]))
    
    for i in range(after_sliding_X_T.shape[0]):
        Big_split_X_T = pd.DataFrame(after_sliding_X_T[i,:])
        Big_split_Y_T = pd.DataFrame(after_sliding_Y_T[i,:])
        split_X_T,split_Y_T = sliding_window(X=Big_split_X_T, Y=Big_split_Y_T, n_steps=n_steps, time=time_lag, stride=batch_stride, time_distributed=0)
        Small_Train_X = np.concatenate((Small_Train_X,split_X_T), axis = 0)
        Small_Train_Y = np.concatenate((Small_Train_Y,split_Y_T), axis = 0)
    for i in range(after_sliding_X_V.shape[0]):
        Big_split_X_V = pd.DataFrame(after_sliding_X_V[i,:])
        Big_split_Y_V = pd.DataFrame(after_sliding_Y_V[i,:])
        split_X_V,split_Y_V = sliding_window(X=Big_split_X_V, Y=Big_split_Y_V, n_steps=n_steps, time=time_lag, stride=batch_stride, time_distributed=0)
        Small_Valid_X = np.concatenate((Small_Valid_X,split_X_V), axis = 0)
        Small_Valid_Y = np.concatenate((Small_Valid_Y,split_Y_V), axis = 0)
    
    ##ML용 Data Set 생성
    ML_pd_raw_X_T = after_sliding_X_T.reshape(-1,after_sliding_X_T.shape[2])
    ML_pd_raw_Y_T = after_sliding_Y_T.reshape(-1,after_sliding_Y_T.shape[2])
    ML_pd_raw_X_V = after_sliding_X_V.reshape(-1,after_sliding_X_V.shape[2])
    ML_pd_raw_Y_V = after_sliding_Y_V.reshape(-1,after_sliding_Y_V.shape[2])

    return Small_Train_X,Small_Train_Y,Small_Valid_X,Small_Valid_Y,ML_pd_raw_X_T,ML_pd_raw_Y_T,ML_pd_raw_X_V,ML_pd_raw_Y_V

In [None]:
split_X_T1,split_Y_T1,split_X_V1,split_Y_V1,ML_pd_raw_X_T1,ML_pd_raw_Y_T1,ML_pd_raw_X_V1,ML_pd_raw_Y_V1 \
= Batch_Input(pd_raw_X.iloc[data_slicing_index[0]:data_slicing_index[1],:],pd_raw_Y.iloc[data_slicing_index[0]:data_slicing_index[1],:],Time_Split_Boolean)
split_X_T2,split_Y_T2,split_X_V2,split_Y_V2,ML_pd_raw_X_T2,ML_pd_raw_Y_T2,ML_pd_raw_X_V2,ML_pd_raw_Y_V2 \
= Batch_Input(pd_raw_X.iloc[data_slicing_index[1]:data_slicing_index[2],:],pd_raw_Y.iloc[data_slicing_index[1]:data_slicing_index[2],:],Time_Split_Boolean)
split_X_T3,split_Y_T3,split_X_V3,split_Y_V3,ML_pd_raw_X_T3,ML_pd_raw_Y_T3,ML_pd_raw_X_V3,ML_pd_raw_Y_V3 \
= Batch_Input(pd_raw_X.iloc[data_slicing_index[2]:data_slicing_index[3],:],pd_raw_Y.iloc[data_slicing_index[2]:data_slicing_index[3],:],Time_Split_Boolean)

In [None]:
Train_X = np.concatenate((split_X_T1,split_X_T2,split_X_T3), axis = 0)
Train_Y = np.concatenate((split_Y_T1,split_Y_T2,split_Y_T3), axis = 0)
Valid_X = np.concatenate((split_X_V1,split_X_V2,split_X_V3), axis = 0)
Valid_Y = np.concatenate((split_Y_V1,split_Y_V2,split_Y_V3), axis = 0)
ML_pd_raw_X_T = pd.DataFrame( np.concatenate((ML_pd_raw_X_T1,ML_pd_raw_X_T2,ML_pd_raw_X_T3), axis = 0) , columns = pd_raw_X_columns)
ML_pd_raw_Y_T = pd.DataFrame( np.concatenate((ML_pd_raw_Y_T1,ML_pd_raw_Y_T2,ML_pd_raw_Y_T3), axis = 0) , columns = pd_raw_Y_columns)
ML_pd_raw_X_V = pd.DataFrame( np.concatenate((ML_pd_raw_X_V1,ML_pd_raw_X_V2,ML_pd_raw_X_V3), axis = 0) , columns = pd_raw_X_columns)
ML_pd_raw_Y_V = pd.DataFrame( np.concatenate((ML_pd_raw_Y_V1,ML_pd_raw_Y_V2,ML_pd_raw_Y_V3), axis = 0) , columns = pd_raw_Y_columns)

# GRU Layer

In [None]:
input_length = pd_raw_X.shape[1]

In [None]:
class ClearTrainingOutput(tf.keras.callbacks.Callback):
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait = True)

## For Keras Hyperparameter Tuning

In [None]:
def model_builder(hp):
    model = keras.models.Sequential()
    
#     if Cv_Boolean == 1:
#         model.add(keras.layers.Conv1D(filters=30, kernel_size=int(n_steps/5), strides=int(n_steps/10), padding='valid', input_shape=[None, input_length]))
#         model.add(keras.layers.BatchNormalization() )
#         model.add(keras.layers.GRU(64, return_sequences=True) )
#         model.add( keras.layers.LayerNormalization() )
#     else:
#         model.add(keras.layers.GRU(64, return_sequences=True, input_shape=[None, input_length]))
#         model.add( keras.layers.LayerNormalization() )
        
    hp_unit = hp.Choice('units', [4, 8, 16, 32]) 
    
    num_fc_layers_min  = 1
    num_fc_layers_max  = 3
    
    dropout_min  =  0
    dropout_max  =  0.4
    dropout_step =  0.2
        
    for i in range( hp.Int('num_fc_layers',min_value=num_fc_layers_min,max_value=num_fc_layers_max) ):
        model.add( GRU( hp_unit, return_sequences=True) )
        model.add( Dropout( hp.Float('dropout_'+str(i+1),min_value=dropout_min,max_value=dropout_max,step=dropout_step) ) )
        model.add( keras.layers.LayerNormalization() )
    
        model.add(keras.layers.GRU(hp_unit, return_sequences=True))
        model.add(keras.layers.LayerNormalization() )
        model.add(keras.layers.Dense(pd_raw_Y.shape[1]))
        
    model.compile(
        optimizer=keras.optimizers.Adam(
        # 학습률은 자주 쓰이는 0.01, 0.001, 0.0001 3개의 값 중 탐색
            hp.Choice('learning_rate',
                      values=[1e-2, 1e-3])),
        loss='mse',
        metrics=['accuracy'])
    
    return model

In [None]:
tuner = kt.Hyperband(
        model_builder, # HyperModel
        objective ='val_loss', #  최적화할 하이퍼모델
        max_epochs = 20, # 각 모델별 학습 회수
        factor = 3,    # 한 번에 훈련할 모델 수 결정 변수
        directory ='temp', # 사용된 parameter 저장할 폴더
        project_name ='helloworld') # 사용된 parameter 저장할 폴더

In [None]:
tuner.search_space_summary()

In [None]:
# tuner 학습
tuner.search(Train_X, Train_Y,
             epochs=30,
             validation_data=(Valid_X, Valid_X), callbacks = [ClearTrainingOutput()])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]

# 최고의 모델을 출력
models = tuner.get_best_models(num_models=3)
# 혹은 결과 출력
tuner.results_summary()

In [None]:
tuner.results_summary()

In [None]:
raise SystemExit("Exit from script")
sys.exit("Exit from script")

In [None]:
#model = tuner.hypermodel.build(best_hps)

## For 1D-CNN Keras HyperParameter Tuning

In [None]:
def cnn_model_builder(hp):
    model = keras.models.Sequential()
    
    hp_filter = hp.Choice('filters', values = [10, 30, 60])
    hp_kernel = hp.Choice('kernel_size', values = [5, 20, 40, 80])
    hp_stride = hp.Choice('strides', values = [5, 20, 40, 80])

    model.add(keras.layers.Conv1D(filters = hp_filter, kernel_size = hp_kernel, strides = hp_stride, padding='valid', input_shape=[None, input_length]))
    model.add(keras.layers.GRU(64, return_sequences=True))
    model.add(keras.layers.LayerNormalization() )

    model.add(keras.layers.GRU(32, return_sequences=True))
    model.add(keras.layers.LayerNormalization() )
    model.add(keras.layers.Dense(pd_raw_Y.shape[1]))
        
    model.compile(optimizer=keras.optimizers.Adam(0.001),loss='mse',metrics=['accuracy'])
    
    return model

In [None]:
tuner_cnn = kt.Hyperband(
        cnn_model_builder, # HyperModel
        objective ='val_loss', #  최적화할 하이퍼모델
        max_epochs = 30, # 각 모델별 학습 회수
        factor = 3,    # 한 번에 훈련할 모델 수 결정 변수
        directory ='temp', # 사용된 parameter 저장할 폴더
        project_name ='helloworld') # 사용된 parameter 저장할 폴더

In [None]:
tuner_cnn.search_space_summary()

In [None]:
# tuner 학습
tuner_cnn.search(Train_X, Train_Y,
             epochs=20,
             validation_data=(Valid_X, Valid_Y), callbacks = [ClearTrainingOutput()])

# Get the optimal hyperparameters
best_hps = tuner_cnn.get_best_hyperparameters(num_trials = 1)[0]

# 최고의 모델을 출력
models = tuner_cnn.get_best_models(num_models=3)
# 혹은 결과 출력
tuner_cnn.results_summary()

In [None]:
raise SystemExit("Exit from script")
sys.exit("Exit from script")

## Machine Learning HP Tuning

### XGboost

In [None]:
def objective(trial):

    params = {
        "max_depth": trial.suggest_int("max_depth", 4, 10),
        "learning_rate": trial.suggest_float('learning_rate', 0.0001, 0.1, log=True),
        'n_estimators': trial.suggest_int("n_estimators", 100, 1000, log=True),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-2, 1),
        'gamma': trial.suggest_float('gamma', 0.1, 1.0, log=True),
    }
    
    X_train, X_valid, y_train, y_valid = train_test_split(pd_raw_X, pd_raw_Y.iloc[:,4], test_size=0.3, random_state=11, shuffle=True)
    
    model = XGBRegressor(**params, random_state=42)
    model.fit(X_train,y_train,eval_set=[(X_valid, y_valid)],early_stopping_rounds=100,verbose=False)

    preds = model.predict(X_valid)
    accuracy = mean_squared_error(y_valid, preds)
    return accuracy

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50, show_progress_bar=True)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")

trial = study.best_trial

print("Accuracy: {}".format(trial.value))
print("Best hyperparameters: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

#clf = xgb.XGBClassifier(**study.best_params, random_state = 1234, use_label_encoder = False)
#clf.fit(X_train, y_train)

### Randomforest

In [None]:
def objective(trial):

    params = {
        "max_depth": trial.suggest_int("max_depth", 10, 1000, log = True),
        "max_leaf_nodes": trial.suggest_int('max_leaf_nodes', 10, 1000, log = True),
        'n_estimators': trial.suggest_int("n_estimators", 100, 1000, log = True),
    }
        
    X_train, X_valid, y_train, y_valid = train_test_split(pd_raw_X, pd_raw_Y.iloc[:,4], test_size=0.3, random_state=11, shuffle=True)
    
    model = RandomForestRegressor(**params, random_state=42)
    model.fit(X_train, y_train)

    preds = model.predict(X_valid)
    accuracy = mean_squared_error(y_valid, preds)
    return accuracy

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20, show_progress_bar=True)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")

trial = study.best_trial

print("Accuracy: {}".format(trial.value))
print("Best hyperparameters: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

### SVR

In [None]:
def objective(trial):

    params = {
       'C': trial.suggest_float("C",0.1,1000.0,log=True),
       'gamma': trial.suggest_float("gamma",0.01,100.0,log=True),
       'epsilon': trial.suggest_float("epsilon",0.01,100.0,log=True)
    }
        
    X_train, X_valid, y_train, y_valid = train_test_split(pd_raw_X, pd_raw_Y.iloc[:,0], test_size=0.3, random_state=11, shuffle=True)
    
    model = SVR(**params, kernel='rbf')
    model.fit(X_train, y_train)

    preds = model.predict(X_valid)
    accuracy = mean_squared_error(y_valid, preds)
    return accuracy

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30, show_progress_bar=True)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")

trial = study.best_trial

print("Accuracy: {}".format(trial.value))
print("Best hyperparameters: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

#clf = xgb.XGBClassifier(**study.best_params, random_state = 1234, use_label_encoder = False)
#clf.fit(X_train, y_train)

In [None]:
def objective(trial):

    params = {
       'C': trial.suggest_float("C",0.1,1000.0,log=True),
       'gamma': trial.suggest_float("gamma",0.01,100.0,log=True),
       'epsilon': trial.suggest_float("epsilon",0.01,100.0,log=True)
    }
        
    X_train, X_valid, y_train, y_valid = train_test_split(pd_raw_X, pd_raw_Y.iloc[:,2], test_size=0.3, random_state=11, shuffle=True)
    
    model = SVR(**params, kernel='rbf')
    model.fit(X_train, y_train)

    preds = model.predict(X_valid)
    accuracy = mean_squared_error(y_valid, preds)
    return accuracy

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20, show_progress_bar=True)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")

trial = study.best_trial

print("Accuracy: {}".format(trial.value))
print("Best hyperparameters: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
def objective(trial):

    params = {
       'C': trial.suggest_float("C",0.1,1000.0,log=True),
       'gamma': trial.suggest_float("gamma",0.01,100.0,log=True),
       'epsilon': trial.suggest_float("epsilon",0.01,100.0,log=True)
    }
        
    X_train, X_valid, y_train, y_valid = train_test_split(pd_raw_X, pd_raw_Y.iloc[:,4], test_size=0.3, random_state=11, shuffle=True)
    
    model = SVR(**params, kernel='rbf')
    model.fit(X_train, y_train)

    preds = model.predict(X_valid)
    accuracy = mean_squared_error(y_valid, preds)
    return accuracy

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20, show_progress_bar=True)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")

trial = study.best_trial

print("Accuracy: {}".format(trial.value))
print("Best hyperparameters: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## Time Gradient

In [None]:
def test_linear_regression2(pd_X, pd_Y, x_column_names, y_column_names, time=0, group_num=5):
    """
    어느 시간대의 input이 각 target에 얼마나 중요한지 판단하는 함수

    Parameters
    ----------
    X : pandas.DataFrame(,5)
    전체 X 데이터

    Y : pandas.DataFrame(,5)
    전체 Y 데이터

    x_column_names : list
    X데이터를 만들때 column의 이름들    

    y_columns_names : list
    Y데이터를 만들떄 column의 이름들

    time : int
    Y데이터의 양시점 coef의 중요도를 보기위한 int
    주어진 int분 만큼의 앞 뒤 X데이터 포함.
    가능한 input : 10, 20, 30, 60, 120

    group_num : int
    앞 뒤 몇개의 그룹을 넣어서 linear regression을 사용하는지에 대한 변수


    Returns
    ----------
    after_X_2d
    linear regression을 하기 위해 전 처리했던 X의 DataFrame 변수.
    
    after_Y_2d
    linear regression을 하기 위해 전 처리했던 Y의 DataFrame 변수.
    
    table_dict
    linear regression후 결과를 정리한 변수
    
    t_result_dict
    linear regression후 각 Y에 대해 X의 T-value를 정리한 변수
    
    f_result_dict
    linear regression후 각 Y에 대해 X의 coef를 정리한 변수
    """
    # 원하는 분만큼 group_size를 늘리기 위한 변수.
    multiple = int(time//10)

    # mean을 계산하기 위한 group_size. 120은 120초를 의미함.
    group_size = multiple * int(600/group_num)

    np_X = pd_X.values
    np_Y = pd_Y.values

    # 1. group_size만큼 묶은 후 평균으로 새로운 ndarray를 만듦.

    new_X_mu = np.zeros((int(np_X.shape[0]/group_size), np_X.shape[1]))
    new_Y = np.zeros((int(np_Y.shape[0]/group_size), np_Y.shape[1]))

    for idx in range(new_X_mu.shape[0]):
        new_X_mu[idx, :] = np_X[idx *
                                group_size: (idx+1)*group_size].mean(axis=0)
        new_Y[idx, :] = np_Y[(idx+1)*group_size-1: (idx+1)
                             * group_size].reshape(-1)

    # 2. 주어진  time만큼의 앞 뒤 X데이터 n그룹 만큼. default는 5그룹

    X_2d = np.zeros(
        (new_X_mu.shape[0]-2*group_num+1, new_X_mu.shape[1]*2*group_num))

    for idx in range(new_X_mu.shape[0]-2*group_num+1):
        X_2d[idx, ] = np.reshape(
            new_X_mu[idx:idx+2*group_num, :], (1, new_X_mu.shape[1]*2*group_num))

    Y_2d = new_Y[group_num-1:-group_num, :].copy()

    # 3. X, Y를 이용하여 beta를 계산한 후 검정 과정

    table_dict = {}

    batch_size = X_2d.shape[0]
    Y_dim = Y_2d.shape[1]

#     print(f'현재 분석은 앞 뒤 {time}분에 대한 분석입니다.')
#     print(f'{group_num}개 의 앞 {time}분, {group_num}개의 뒤 {time}분')
    X_2d_table = pd.DataFrame(X_2d, columns=[f'{x}_{i}_{time}min' for i in range(
        1, group_num*2+1) for x in x_column_names])
    Y_2d_table = pd.DataFrame(Y_2d, columns=y_column_names)

    for idx in range(Y_dim):
        reg = LinearRegression().fit(X_2d, Y_2d[:, idx])
        beta_hat = reg.coef_
        y_hat = X_2d@np.array(beta_hat)
        mse = np.sum((Y_2d[:, idx]-y_hat)**2)/(batch_size-beta_hat.shape[0])
        variance_of_beta_hat = np.linalg.inv(X_2d.T@X_2d)*mse
        se = np.sqrt(np.diag(variance_of_beta_hat))

        p_val = []
        t_val = []
        for i in range(beta_hat.shape[0]):
            p_temp = 2 * \
                (1 - t.cdf(abs(beta_hat[i]/se[i]),
                 batch_size-beta_hat.shape[0]))
            p_val.append(round(float(p_temp), 3))
            t_temp = abs(beta_hat[i]/se[i])
            t_val.append(round(float(t_temp), 3))

        table = pd.DataFrame()
        table['Variable'] = [f'{x}_{i}_{time}min' for i in range(
            1, group_num*2+1) for x in x_column_names]
        table['coef'] = beta_hat
        table['S.E'] = se
        table['p-value'] = p_val
        table['t-value'] = t_val
        table.loc[(table['p-value'] < 0.05) &
                  (table['p-value'] >= 0.01), 'star'] = '*'
        table.loc[(table['p-value'] < 0.01) &
                  (table['p-value'] >= 0.001), 'star'] = '**'
        table.loc[(table['p-value'] < 0.001), 'star'] = '***'

        table_dict['{0}'.format(y_column_names[idx])] = table

    # 4. 계수 및  t-value의 시각화를 위한 부분.
    t_result_dict = {}
    f_result_dict = {}
    
    for target in table_dict.keys():
        temp_table = pd.DataFrame()
        temp_table2 = pd.DataFrame()
        for X in x_column_names:
            temp = table_dict[f'{target}'].drop(
                columns=['coef', 'S.E', 'p-value', 'star']).T
            temp.columns = table_dict[f'{target}']['Variable']
            temp = temp.drop(index=['Variable'])
            temp_table[f'{X}'] = temp.filter(like=f'{X}').values.reshape(-1)
            temp_table.index = [f'{int(group_size*(x+1))}s ~ {int(group_size*x)}s' for x in reversed(range(
                0, group_num))]+[f'{int(group_size*x)}s ~ {int(group_size*(x+1))}s' for x in range(0, group_num)]

            temp_table = temp_table.astype('float')

            temp2 = table_dict[f'{target}'].drop(
                columns=['t-value', 'S.E', 'p-value', 'star']).T
            temp2.columns = table_dict[f'{target}']['Variable']
            temp2 = temp2.drop(index=['Variable'])
            temp_table2[f'{X}'] = temp2.filter(like=f'{X}').values.reshape(-1)
            temp_table2.index = [f'{int(group_size*(x+1))}s ~ {int(group_size*x)}s' for x in reversed(range(
                0, group_num))]+[f'{int(group_size*x)}s ~ {int(group_size*(x+1))}s' for x in range(0, group_num)]
            temp_table2 = temp_table2.astype('float')

        t_result_dict[f'{target}'] = temp_table
        sns.set(font="Malgun Gothic")
        fig, ax = plt.subplots(figsize=(11, 11))
        ax = sns.heatmap(temp_table, annot=True, annot_kws={
                         "size": 12}, cmap="YlGnBu",fmt = '.1f')
        plt.title(f'{target} {time}minute front and back t-value', fontsize=12)
        plt.show()

        f_result_dict[f'{target}'] = temp_table2
        sns.set(font="Malgun Gothic")
        fig, ax = plt.subplots(figsize=(11, 11))
        ax = sns.heatmap(temp_table2, annot=True,
                         annot_kws={"size": 12}, cmap="bwr",fmt = '.1f')
        plt.title(f'{target} {time}minute front and back coef', fontsize=12)
        plt.show()

    return X_2d_table, Y_2d_table, table_dict, t_result_dict, f_result_dict

In [None]:
after_X_2d, after_Y_2d, table_result, t_result_dict,coef_result_dict  = test_linear_regression2(pd_raw_X, pd_raw_Y, pd_raw_X_columns, pd_raw_Y_columns, time = 60, group_num = 6)

In [None]:
# import seaborn as sns
def NN(X, y, epoch, out_type, dict_index):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(4, activation='relu'))
    model.add(keras.layers.Dense(after_Y_2d.shape[1]))
    model.compile(loss="mse", optimizer="adam")
    history = model.fit(after_X_2d, after_Y_2d, epochs=epoch,verbose=0)
    
    pd.DataFrame(history.history).loc[:,['loss']].plot(figsize=(8,5))
    plt.xlabel('Iteration(epoch)')
    plt.ylabel('Mean Squared Error')
    plt.grid(True)
    
    if out_type == "max":
        htmp = np.max(abs(model.layers[0].get_weights()[0]),axis=1)
    elif out_type == "mean":
        htmp = np.mean(abs(model.layers[0].get_weights()[0]),axis=1)
    else:
        print("You can only check max or mean" )
    x_axis_labels = ['H1', 'SPD_RPM', 'X_Servo', 'Bed_Mid', 'S4']
    y_axis_labels = dict_index[after_Y_2d.columns[0]].index
    fig, ax = plt.subplots(figsize=(10, 10))
    plt.title('Fully Connected Layer Weights Mean by Feature', fontsize=12)
    
    return sns.heatmap(htmp.reshape(-1,pd_raw_X.shape[1]),xticklabels=x_axis_labels,yticklabels=y_axis_labels,annot=True,annot_kws={"size": 12}, cmap="YlGnBu")

In [None]:
NN(after_X_2d, after_Y_2d, 50, "mean",t_result_dict)