# 3. 훈련 데이터 생성
## 3.1 데이터 로드와 전처리

In [None]:
import pandas as pd
import numpy as np
import pickle
import os
from sklearn.preprocessing import MinMaxScaler

train_x, train_y = np.array([]), np.array([])
want_para = ['WAFWTK', 'FCWP']
scaler = MinMaxScaler()

for file in os.listdir('./DB')[0:2]:
    if '.csv' in file:
        csv_db = pd.read_csv(f'./DB/{file}', index_col=0)
        #1. CSV 파일을 Numpy 배열로 전환
        get_xdb = csv_db[want_para].to_numpy()

        #2. 라벨링
        get_ydb = csv_db.loc[:, 'Normal_0'].to_numpy()
        accident_nub = {
            '12': 1, # LOCA
            '13': 2, # SGTR
            '15': 1, # PZR PORV [LOCA]
            '17': 1, # Feedwater line leak [LOCA]
            '18': 3, # Steam Line Rupture MSLB
            '52': 3, # Steam Line Rupture MSLB (non-isolable)
        }
        get_mal_nub = file.split(',')[0][1:] # '(12, 000000, 10)' -> 12
        get_y = np.where(get_ydb != 0, accident_nub[get_mal_nub], get_ydb)

        #3. 데이터 축적
        train_x = get_xdb if train_x.shape[0] == 0 else np.concatenate((train_x, get_xdb), axis=0)
        train_y = np.append(train_y, get_y, axis=0)

        #4. min_max scaler update
        scaler.partial_fit(train_x)

        print(f'Read {file} \t train_x shape : {np.shape(train_x)} train_y shape : {np.shape(train_y)}')

# 5. 전체 db min-max scaling
train_x = scaler.transform(train_x)

# 6. 저장
save_data_info = {
    'scaler': scaler,
    'want_para': want_para,
    'train_x': train_x,
    'train_y': train_y,
}

with open('db_info.pkl', 'wb') as f:
    pickle.dump(save_data_info, f)

# 4. 훈련데이터 불러오기 및 네트워크 훈련
## 4.1 훈련데이터 불러오기

In [None]:
with open('db_info.pkl', 'rb') as f:
    save_data_info = pickle.load(f)

## 3.2 네트워크 빌드 및 훈련

In [None]:
import tensorflow.keras as k

model = k.Sequential([
    k.layers.InputLayer(input_shape=(len(save_data_info['want_para']))),
    k.layers.Dense(128),
    k.layers.Dense(128, activation='relu'),
    k.layers.Dense(4, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
print(model.summary())
model.fit(save_data_info['train_x'], save_data_info['train_y'], epochs=5)

## 3.3 네트워크 저장

In [None]:
model.save_weights('model.h5')

## 3.4 네트워크 로드

In [None]:
model.load_weights('model.h5')