# Best Model Saving, 최적 모델 찾기 by sklearn.model_selection
```
모델 생성 시 seed = 42 ~ 50일 때, 각각의 best모델에서 data seed = 1~50에서의 평균 정확도
```

# data_seed, model_seed = 42

In [None]:
import pandas as pd
import numpy as numpy
import warnings
warnings.filterwarnings(action='ignore')

import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, InputLayer
from tensorflow.keras.callbacks import EarlyStopping
from sklearn import metrics
from tensorflow import keras
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam


import random
# -------------------------
seed_num = 42               #data seed
# -------------------------
random.seed(seed_num)

x = np.load('C:/Users/FLAG/Desktop/dsml_data/x_(7727,10,4068).npy')
y = np.load('C:/Users/FLAG/Desktop/dsml_data/y_(7727,1).npy')

idx = list(range(len(x)))
random.shuffle(idx)

i = round(x.shape[0]*0.8)
X_train, y_train = x[idx[:i], :, :], y[idx[:i]]
X_test, y_test = x[idx[i:], :, :], y[idx[i:]]

# -------------------------
seed_num = 42               #model seed
# -------------------------
tf.random.set_seed(seed_num)

lstm = Sequential()
lstm.add(InputLayer(input_shape = (X_train.shape[1], X_train.shape[2])))
lstm.add(LSTM(units = 128, activation='hard_sigmoid', return_sequences = True))
lstm.add(LSTM(units = 64, activation = 'hard_sigmoid', return_sequences = True))
lstm.add(Dropout(0.2))
lstm.add(LSTM(units = 64, activation = 'hard_sigmoid', return_sequences = True))
lstm.add(LSTM(units = 32, activation = 'hard_sigmoid', return_sequences = False))
lstm.add(Dropout(0.2))
lstm.add(Dense(units = 1, activation = 'sigmoid'))

from tensorflow.keras.callbacks import ModelCheckpoint
import os

MODEL_SAVE_FOLDER_PATH = './model/'
if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
    os.mkdir(MODEL_SAVE_FOLDER_PATH)

model_path = MODEL_SAVE_FOLDER_PATH + 'seed42-{epoch:02d}-{val_loss:.4f}.hdf5'

cb_checkpoint = ModelCheckpoint(filepath = model_path, monitor = 'val_loss',verbose = 1, save_best_only = True)

early_stop = EarlyStopping(monitor = 'val_acc', patience = 50, verbose = 1, restore_best_weights = True)
lstm.compile(optimizer = keras.optimizers.Adam(learning_rate = 0.001), loss = 'binary_crossentropy', metrics = ['acc'])
lstm.fit(X_train, y_train, validation_split = 0.25, batch_size = 128, epochs = 500, callbacks = [early_stop, cb_checkpoint], shuffle = False)

In [None]:
from keras.models import load_model
best_model = load_model('C:/Users/FLAG/Desktop/dsml_data/model/(220125.nodecay)seed42-05-0.5021.hdf5') 

dic_42 = {}
for seed in range(0,50):
    random.seed(seed)

    x = np.load('C:/Users/FLAG/Desktop/dsml_data/x_(7727,10,4068).npy')
    y = np.load('C:/Users/FLAG/Desktop/dsml_data/y_(7727,1).npy')

    idx = list(range(len(x)))
    random.shuffle(idx)

    i = round(x.shape[0]*0.8)
    X_train, y_train = x[idx[:i], :, :], y[idx[:i]]
    X_test, y_test = x[idx[i:], :, :], y[idx[i:]]

    pred = best_model.predict(X_test)
    pred[pred>0.5] = 1
    pred[pred<=0.5] = 0
    acc = metrics.accuracy_score(y_test, pred)
    dic_42[seed] = acc
    print(f'정확도: {metrics.accuracy_score(y_test, pred)}, seed_num = {seed}')


In [None]:
# seed = 42의 정확도 df 만들고 평균 확인 => 82.6
df_42 = pd.DataFrame.from_dict(dic_42, orient = 'index')
df_42.mean()