In [1]:
import pickle
import numpy as np
import pandas as pd

In [2]:
with open('df_scaled.pickle','rb') as f:
    df_scaled = pickle.load(f)


In [3]:
df_scaled['Food']

Unnamed: 0_level_0,종가,등락률,거래변동량,코스피지수_증감량
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-05-14,0.074920,0.491660,0.000000,0.511587
2010-05-17,0.077067,0.468725,0.025468,0.342590
2010-05-18,0.101557,0.554667,0.041343,0.479910
2010-05-19,0.087859,0.434181,0.027336,0.461179
2010-05-20,0.089447,0.486699,0.028428,0.397058
...,...,...,...,...
2022-07-06,0.417075,0.499675,0.023779,0.320948
2022-07-07,0.417794,0.515987,0.040026,0.673459
2022-07-08,0.410821,0.472507,0.022978,0.574176
2022-07-11,0.406615,0.454828,0.016001,0.471981


# train / test 분할

In [4]:
from sklearn.model_selection import train_test_split
X_train_dic = {}
X_test_dic={}
y_train_dic = {}
y_test_dic = {}
#모델이 각 업종마다 존재한다. 18개
for sector_name, df in df_scaled.items():
    X_train, X_test, y_train, y_test = train_test_split(df.drop('종가', 1), df['종가'], test_size=0.2, random_state=0, shuffle=False)
    X_train_dic[sector_name] = X_train
    X_test_dic[sector_name] = X_test
    y_train_dic[sector_name] = y_train
    y_test_dic[sector_name] = y_test
    
    

  X_train, X_test, y_train, y_test = train_test_split(df.drop('종가', 1), df['종가'], test_size=0.2, random_state=0, shuffle=False)


In [5]:
X_train_dic['Food'].shape, y_train_dic['Food'].shape

((2400, 3), (2400,))

In [6]:
X_test_dic['Food'].shape, y_test_dic['Food'].shape

((600, 3), (600,))

# TensroFlow Dataset을 활용한 시퀀스 데이터셋 구성

In [7]:
import tensorflow as tf

In [8]:
def windowed_dataset(series, window_size, batch_size, shuffle):
    # 텐서플로우 입력은 3D 텐서를 입력으로 받는다.
    # 2차원 데이터를 -> 3차원 (마지막 축 하나 추가)
    series = tf.expand_dims(series, axis=-1)
    
    # numpy array x(input)를 Dataset으로 변환
    ds = tf.data.Dataset.from_tensor_slices(series)
    
    #
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    if shuffle:
        ds = ds.shuffle(1000)
    ds = ds.map(lambda w: (w[:-1], w[-1]))
    return ds.batch(batch_size).prefetch(1)

## Hyperparameter를 정의합니다.

In [9]:
WINDOW_SIZE=20
BATCH_SIZE=32

In [10]:
train_dic= {}
test_dic={}

In [12]:
for sector_name, y_train in y_train_dic.items():
    train_data = windowed_dataset(y_train, WINDOW_SIZE, BATCH_SIZE, True)
    train_dic[sector_name] = train_data
    
for sector_name, y_test in y_test_dic.items():
    test_data = windowed_dataset(y_test, WINDOW_SIZE, BATCH_SIZE, False)
    test_dic[sector_name] = test_data

In [14]:
# 아래의 코드로 데이터셋의 구성을 확인해 볼 수 있습니다.
# X: (batch_size, window_size, feature)
# Y: (batch_size, feature)
for data in train_dic['Food'].take(1):
    print(f'데이터셋(X) 구성(batch_size, window_size, feature갯수): {data[0].shape}')
    print(f'데이터셋(Y) 구성(batch_size, window_size, feature갯수): {data[1].shape}')

데이터셋(X) 구성(batch_size, window_size, feature갯수): (32, 20, 1)
데이터셋(Y) 구성(batch_size, window_size, feature갯수): (32, 1)


## 모델

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Lambda
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


model = Sequential([
    # 1차원 feature map 생성
    Conv1D(filters=32, kernel_size=5,
           padding="causal",
           activation="relu",
           input_shape=[WINDOW_SIZE, 1]),
    # LSTM
    LSTM(16, activation='tanh'),
    Dense(16, activation="relu"),
    Dense(1),
])

In [16]:
# Sequence 학습에 비교적 좋은 퍼포먼스를 내는 Huber()를 사용합니다.
loss = Huber()
optimizer = Adam(0.0005)
model.compile(loss=Huber(), optimizer=optimizer, metrics=['mse'])

In [21]:
import os
# earlystopping은 10번 epoch통안 val_loss 개선이 없다면 학습을 멈춥니다.
earlystopping = EarlyStopping(monitor='val_loss', patience=10)

# val_loss 기준 체크포인터도 생성합니다.
#filename = os.path.join('tmp', 'ckeckpointer.ckpt')
mc = {}
for sector_name in train_dic:
    mc[sector_name] = ModelCheckpoint(sector_name+'_model.h5', 
                         save_weights_only=True, 
                         save_best_only=True, 
                         monitor='val_loss', 
                         verbose=1)

In [22]:
history_dic={}
for sector_name, train_data in train_dic.items():
    test_data = test_dic[sector_name]
    history = model.fit(train_data, 
                        validation_data=(test_data), 
                        epochs=50, 
                        callbacks=[mc[sector_name], earlystopping])
    history_dic[sector_name] = history

Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.00020, saving model to Food_model.h5
Epoch 2/50

Epoch 00002: val_loss did not improve from 0.00020
Epoch 3/50

Epoch 00003: val_loss improved from 0.00020 to 0.00020, saving model to Food_model.h5
Epoch 4/50

Epoch 00004: val_loss improved from 0.00020 to 0.00020, saving model to Food_model.h5
Epoch 5/50

Epoch 00005: val_loss improved from 0.00020 to 0.00019, saving model to Food_model.h5
Epoch 6/50

Epoch 00006: val_loss improved from 0.00019 to 0.00018, saving model to Food_model.h5
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.00018
Epoch 8/50

Epoch 00008: val_loss improved from 0.00018 to 0.00018, saving model to Food_model.h5
Epoch 9/50

Epoch 00009: val_loss improved from 0.00018 to 0.00018, saving model to Food_model.h5
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.00018
Epoch 11/50

Epoch 00011: val_loss improved from 0.00018 to 0.00017, saving model to Food_model.h5
Epoch 12/50

Epoch 00012: val


Epoch 00038: val_loss did not improve from 0.00015
Epoch 39/50

Epoch 00039: val_loss improved from 0.00015 to 0.00014, saving model to Food_model.h5
Epoch 40/50

Epoch 00040: val_loss did not improve from 0.00014
Epoch 41/50

Epoch 00041: val_loss improved from 0.00014 to 0.00014, saving model to Food_model.h5
Epoch 42/50

Epoch 00042: val_loss did not improve from 0.00014
Epoch 43/50

Epoch 00043: val_loss did not improve from 0.00014
Epoch 44/50

Epoch 00044: val_loss did not improve from 0.00014
Epoch 45/50

Epoch 00045: val_loss did not improve from 0.00014
Epoch 46/50

Epoch 00046: val_loss did not improve from 0.00014
Epoch 47/50

Epoch 00047: val_loss did not improve from 0.00014
Epoch 48/50

Epoch 00048: val_loss improved from 0.00014 to 0.00014, saving model to Food_model.h5
Epoch 49/50

Epoch 00049: val_loss did not improve from 0.00014
Epoch 50/50

Epoch 00050: val_loss did not improve from 0.00014
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.00014, saving mode


Epoch 00003: val_loss did not improve from 0.00015
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.00015
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.00015
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.00015
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.00015
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.00015
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.00015
Epoch 10/50

Epoch 00010: val_loss improved from 0.00015 to 0.00015, saving model to Medicine_model.h5
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.00015
Epoch 12/50

Epoch 00012: val_loss did not improve from 0.00015
Epoch 13/50

Epoch 00013: val_loss did not improve from 0.00015
Epoch 14/50

Epoch 00014: val_loss did not improve from 0.00015
Epoch 15/50

Epoch 00015: val_loss did not improve from 0.00015
Epoch 16/50

Epoch 00016: val_loss did not improve from 0.00015
Epoch 17/50

Epoch 00017: val_loss did not improve from 0.00015
Epoch 18/50

Epoch 


Epoch 00008: val_loss did not improve from 0.00008
Epoch 9/50

Epoch 00009: val_loss improved from 0.00008 to 0.00008, saving model to Metal_model.h5
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.00008
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.00008
Epoch 12/50

Epoch 00012: val_loss did not improve from 0.00008
Epoch 13/50

Epoch 00013: val_loss did not improve from 0.00008
Epoch 14/50

Epoch 00014: val_loss did not improve from 0.00008
Epoch 15/50

Epoch 00015: val_loss did not improve from 0.00008
Epoch 16/50

Epoch 00016: val_loss did not improve from 0.00008
Epoch 17/50

Epoch 00017: val_loss did not improve from 0.00008
Epoch 18/50

Epoch 00018: val_loss did not improve from 0.00008
Epoch 19/50

Epoch 00019: val_loss did not improve from 0.00008
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.00021, saving model to Machine_model.h5
Epoch 2/50

Epoch 00002: val_loss improved from 0.00021 to 0.00021, saving model to Machine_model.h5
Epoch 3/50



Epoch 00002: val_loss did not improve from 0.00006
Epoch 3/50

Epoch 00003: val_loss improved from 0.00006 to 0.00006, saving model to Construction_model.h5
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.00006
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.00006
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.00006
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.00006
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.00006
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.00006
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.00006
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.00006
Epoch 12/50

Epoch 00012: val_loss did not improve from 0.00006
Epoch 13/50

Epoch 00013: val_loss did not improve from 0.00006
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.00003, saving model to Transport_model.h5
Epoch 2/50

Epoch 00002: val_loss improved from 0.00003 to 0.00003, saving model to Transport_model.h5
Epoch 


Epoch 00015: val_loss did not improve from 0.00005
Epoch 16/50

Epoch 00016: val_loss did not improve from 0.00005
Epoch 17/50

Epoch 00017: val_loss did not improve from 0.00005
Epoch 18/50

Epoch 00018: val_loss did not improve from 0.00005
Epoch 19/50

Epoch 00019: val_loss did not improve from 0.00005
Epoch 20/50

Epoch 00020: val_loss did not improve from 0.00005
Epoch 21/50

Epoch 00021: val_loss improved from 0.00005 to 0.00005, saving model to Distribution_model.h5
Epoch 22/50

Epoch 00022: val_loss improved from 0.00005 to 0.00004, saving model to Distribution_model.h5
Epoch 23/50

Epoch 00023: val_loss did not improve from 0.00004
Epoch 24/50

Epoch 00024: val_loss did not improve from 0.00004
Epoch 25/50

Epoch 00025: val_loss did not improve from 0.00004
Epoch 26/50

Epoch 00026: val_loss did not improve from 0.00004
Epoch 27/50

Epoch 00027: val_loss did not improve from 0.00004
Epoch 28/50

Epoch 00028: val_loss did not improve from 0.00004
Epoch 29/50

Epoch 00029: val_


Epoch 00023: val_loss did not improve from 0.00013
Epoch 24/50

Epoch 00024: val_loss did not improve from 0.00013
Epoch 25/50

Epoch 00025: val_loss did not improve from 0.00013
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.00030, saving model to Tele_model.h5
Epoch 2/50

Epoch 00002: val_loss improved from 0.00030 to 0.00027, saving model to Tele_model.h5
Epoch 3/50

Epoch 00003: val_loss improved from 0.00027 to 0.00022, saving model to Tele_model.h5
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.00022
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.00022
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.00022
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.00022
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.00022
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.00022
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.00022
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.00022
Epoch 12/50

Epoch 00012


Epoch 00012: val_loss did not improve from 0.00032
Epoch 13/50

Epoch 00013: val_loss did not improve from 0.00032
Epoch 14/50

Epoch 00014: val_loss did not improve from 0.00032
Epoch 15/50

Epoch 00015: val_loss did not improve from 0.00032
Epoch 16/50

Epoch 00016: val_loss did not improve from 0.00032
Epoch 17/50

Epoch 00017: val_loss did not improve from 0.00032
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.00011, saving model to Insurer_model.h5
Epoch 2/50

Epoch 00002: val_loss did not improve from 0.00011
Epoch 3/50

Epoch 00003: val_loss did not improve from 0.00011
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.00011
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.00011
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.00011
Epoch 7/50

Epoch 00007: val_loss improved from 0.00011 to 0.00011, saving model to Insurer_model.h5
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.00011
Epoch 9/50

Epoch 00009: val_loss did not improve fr


Epoch 00018: val_loss did not improve from 0.00017
Epoch 19/50

Epoch 00019: val_loss improved from 0.00017 to 0.00017, saving model to Service_model.h5
Epoch 20/50

Epoch 00020: val_loss did not improve from 0.00017
Epoch 21/50

Epoch 00021: val_loss did not improve from 0.00017
Epoch 22/50

Epoch 00022: val_loss did not improve from 0.00017
Epoch 23/50

Epoch 00023: val_loss did not improve from 0.00017
Epoch 24/50

Epoch 00024: val_loss did not improve from 0.00017
Epoch 25/50

Epoch 00025: val_loss did not improve from 0.00017
Epoch 26/50

Epoch 00026: val_loss did not improve from 0.00017
Epoch 27/50

Epoch 00027: val_loss did not improve from 0.00017
Epoch 28/50

Epoch 00028: val_loss did not improve from 0.00017
Epoch 29/50

Epoch 00029: val_loss did not improve from 0.00017
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.00024, saving model to Manufacturer_model.h5
Epoch 2/50

Epoch 00002: val_loss did not improve from 0.00024
Epoch 3/50

Epoch 00003: val_loss improve