In [109]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import display
import seaborn as sns
import numpy as np
from typing import List, Callable
# import tensorflow as tf
# from tensorflow.keras import layers


# matplotlib.font_manager._rebuild()
plt.rc('font', family='Malgun Gothic')
plt.rcParams['axes.unicode_minus'] = False #한글 폰트 사용시 마이너스 폰트 깨짐 해결
sns.set(font="Malgun Gothic",rc={"axes.unicode_minus":False}, style='whitegrid')

In [110]:
data_root = "./data/daily"

## 1. Data 준비

### 1) Weather data

In [111]:
# load each data
temp_df = pd.read_csv(f'{data_root}/Temperatures.csv', encoding='cp949')
precip_df = pd.read_csv(f'{data_root}/Precipitation.csv', encoding='cp949').fillna(0)
wind_df = pd.read_csv(f'{data_root}/Wind.csv', encoding='cp949')
humidity_df = pd.read_csv(f'{data_root}/Humidity.csv', encoding='cp949')
sunshine_df = pd.read_csv(f'{data_root}/Sunshine.csv', encoding='cp949')

# merge
weather_df = temp_df.merge(precip_df).merge(wind_df).merge(humidity_df).merge(sunshine_df)
weather_columns = ["평균기온", "강수량mm", "평균풍속ms", "평균습도rh","일조합"]

selected_weather_df = weather_df[['일시']+weather_columns]
display(selected_weather_df.head(3))

Unnamed: 0,일시,평균기온,강수량mm,평균풍속ms,평균습도rh,일조합
0,2012-01-01,-3.0,0.0,2.9,64.5,4.4
1,2012-01-02,-4.8,0.0,2.3,66.5,5.9
2,2012-01-03,-4.5,0.4,2.5,68.3,2.7


### 2) Stock data

In [112]:
company_name = "Samsung Electronics Co"
stock_price_df = pd.read_csv(f'./data/stock/{company_name}.csv')
stock_columns = ['Close']
selected_stock_df = stock_price_df[['Date'] + stock_columns]
display(selected_stock_df.head(3))

Unnamed: 0,Date,Close
0,2000-01-04,6110.0
1,2000-01-05,5580.0
2,2000-01-06,5620.0


### 3) Merge

In [113]:
stock_weather_df = selected_weather_df.rename(columns={"일시":"Date"}).merge(selected_stock_df)
display(stock_weather_df)

Unnamed: 0,Date,평균기온,강수량mm,평균풍속ms,평균습도rh,일조합,Close
0,2012-01-02,-4.8,0.0,2.3,66.5,5.9,21600.0
1,2012-01-03,-4.5,0.4,2.5,68.3,2.7,22100.0
2,2012-01-04,-7.4,0.0,3.1,55.4,8.1,21600.0
3,2012-01-05,-5.7,0.0,1.5,49.8,9.0,21100.0
4,2012-01-06,-2.8,0.0,2.5,42.9,8.4,20800.0
...,...,...,...,...,...,...,...
2553,2022-05-20,20.6,0.0,2.3,52.8,10.8,68000.0
2554,2022-05-23,23.0,0.0,2.3,63.5,10.4,67900.0
2555,2022-05-24,22.9,0.0,2.1,56.8,12.3,66500.0
2556,2022-05-25,21.0,3.5,2.6,66.6,8.1,66400.0


## 2. Training
### 1) RNN

In [114]:
# cfg
batch_size = 5
t_dim = 24  # todo: 변수명 변경 필요..
input_dim = len(weather_columns) + len(stock_columns)
epochs = 10

In [115]:
import torch
import torch.nn as nn

class StockModel(nn.Module):
    def __init__(self, input_dim, t_dim, h_c=32, num_layers=2):
        super(StockModel, self).__init__()
        self.input_dim = input_dim
        self.t_dim = t_dim

        self.rnn = nn.LSTM(input_size=input_dim, hidden_size=h_c, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(in_features=h_c, out_features=1)

    def forward(self, x):
        output, hn = self.rnn(x)
        return self.fc(output[:, -1, :]) # batch, last_cell, out_dim

model = StockModel(input_dim, t_dim)

### 2) Dataloader

In [116]:
from torch.utils.data import DataLoader, Dataset

class StockDataset(Dataset):
    def __init__(self, df, t_dim, label:List = None,):
        super(StockDataset, self).__init__()
        self.df = df
        self.t_dim = t_dim
        if label is None:
            label = df.columns
        self.label = label
        self.total_length = len(df) - (t_dim + 1)

    def __len__(self):
        return self.total_length

    def __getitem__(self, idx):
        x = self.df.loc[idx: idx+self.t_dim-1].values
        y = self.df.loc[idx+self.t_dim][self.label].values
        return x, y

ds = StockDataset(stock_weather_df.drop(columns='Date'), t_dim=t_dim, label=['Close'])
dataloader = DataLoader(ds, batch_size=4, shuffle=True)

In [117]:
def train(model, train_loader, criterion, optimizer):
    for epoch in range(epochs):
        running_loss = 0
        for step, (x, y) in enumerate(train_loader):
            x, y = x.float(), y.float()
            optimizer.zero_grad()

            pred = model(x)
            loss =criterion(pred, y)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if (step+1) % 100 == 0:
                print(f"{epoch+1} epoch : {step+1 :5d} loss={running_loss/(step+1)}")

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, dataloader, criterion, optimizer)

1 epoch :   100 loss=nan
1 epoch :   200 loss=nan
1 epoch :   300 loss=nan
1 epoch :   400 loss=nan
1 epoch :   500 loss=nan
1 epoch :   600 loss=nan
2 epoch :   100 loss=nan
2 epoch :   200 loss=nan
2 epoch :   300 loss=nan
2 epoch :   400 loss=nan
2 epoch :   500 loss=nan
2 epoch :   600 loss=nan
3 epoch :   100 loss=nan
3 epoch :   200 loss=nan
3 epoch :   300 loss=nan
3 epoch :   400 loss=nan
3 epoch :   500 loss=nan
3 epoch :   600 loss=nan
4 epoch :   100 loss=nan
4 epoch :   200 loss=nan
4 epoch :   300 loss=nan
4 epoch :   400 loss=nan
4 epoch :   500 loss=nan
4 epoch :   600 loss=nan
5 epoch :   100 loss=nan
5 epoch :   200 loss=nan
5 epoch :   300 loss=nan
5 epoch :   400 loss=nan
5 epoch :   500 loss=nan
5 epoch :   600 loss=nan
6 epoch :   100 loss=nan
6 epoch :   200 loss=nan
6 epoch :   300 loss=nan
6 epoch :   400 loss=nan
6 epoch :   500 loss=nan
6 epoch :   600 loss=nan
7 epoch :   100 loss=nan
7 epoch :   200 loss=nan
7 epoch :   300 loss=nan
7 epoch :   400 loss=nan
