In [49]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import display
import seaborn as sns
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers

# matplotlib.font_manager._rebuild()
plt.rc('font', family='Malgun Gothic')
plt.rcParams['axes.unicode_minus'] = False #한글 폰트 사용시 마이너스 폰트 깨짐 해결
sns.set(font="Malgun Gothic",rc={"axes.unicode_minus":False}, style='whitegrid')

In [50]:
data_root = "./data/daily"

## 1. Data 준비

### 1) Weather data

In [51]:
# load each data
temp_df = pd.read_csv(f'{data_root}/Temperatures.csv', encoding='cp949')
precip_df = pd.read_csv(f'{data_root}/Precipitation.csv', encoding='cp949').fillna(0)
wind_df = pd.read_csv(f'{data_root}/Wind.csv', encoding='cp949')
humidity_df = pd.read_csv(f'{data_root}/Humidity.csv', encoding='cp949')
sunshine_df = pd.read_csv(f'{data_root}/Sunshine.csv', encoding='cp949')

# merge
weather_df = temp_df.merge(precip_df).merge(wind_df).merge(humidity_df).merge(sunshine_df)
weather_columns = ["평균기온", "강수량mm", "평균풍속ms", "평균습도rh","일조합"]

selected_weather_df = weather_df[['일시']+weather_columns]
display(selected_weather_df.head(3))

Unnamed: 0,일시,평균기온,강수량mm,평균풍속ms,평균습도rh,일조합
0,2012-01-01,-3.0,0.0,2.9,64.5,4.4
1,2012-01-02,-4.8,0.0,2.3,66.5,5.9
2,2012-01-03,-4.5,0.4,2.5,68.3,2.7


### 2) Stock data

In [52]:
company_name = "Samsung Electronics Co"
stock_price_df = pd.read_csv(f'./data/stock/{company_name}.csv')
stock_columns = ['Close']
selected_stock_df = stock_price_df[['Date'] + stock_columns]
display(selected_stock_df.head(3))

Unnamed: 0,Date,Close
0,2000-01-04,6110.0
1,2000-01-05,5580.0
2,2000-01-06,5620.0


### 3) Merge

In [53]:
stock_weather_df = selected_weather_df.rename(columns={"일시":"Date"}).merge(selected_stock_df)
display(stock_weather_df)

Unnamed: 0,Date,평균기온,강수량mm,평균풍속ms,평균습도rh,일조합,Close
0,2012-01-02,-4.8,0.0,2.3,66.5,5.9,21600.0
1,2012-01-03,-4.5,0.4,2.5,68.3,2.7,22100.0
2,2012-01-04,-7.4,0.0,3.1,55.4,8.1,21600.0
3,2012-01-05,-5.7,0.0,1.5,49.8,9.0,21100.0
4,2012-01-06,-2.8,0.0,2.5,42.9,8.4,20800.0
...,...,...,...,...,...,...,...
2553,2022-05-20,20.6,0.0,2.3,52.8,10.8,68000.0
2554,2022-05-23,23.0,0.0,2.3,63.5,10.4,67900.0
2555,2022-05-24,22.9,0.0,2.1,56.8,12.3,66500.0
2556,2022-05-25,21.0,3.5,2.6,66.6,8.1,66400.0


## 2. Training
### 1) RNN

In [54]:
# cfg
batch_size = 4
t_dim = 4
input_dim = len(weather_columns) + len(stock_columns)

In [55]:
def build_model(input_dim: int, t_dim: int):
    model = tf.keras.Sequential()
    model.add(layers.InputLayer(input_shape=(t_dim, input_dim)))
    model.add(layers.GRU(32, return_sequences=True))
    model.add(layers.SimpleRNN(32))
    model.add(layers.Dense(1))

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.MSE(),
                  metrics=['accuracy'])
    return model
model = build_model(input_dim, t_dim)

### 2) Dataloader

In [62]:
# ds = tf.data.Dataset.from_tensor_slices(stock_weather_df.drop(columns='Date'))
# ds = ds.window(size=t_dim+1, shift=1, drop_remainder=True)
# ds = ds.flat_map(lambda x:x.batch(t_dim+1))