## 데이터 로드

In [None]:
pip install -U finance-datareader

In [2]:
import FinanceDataReader as fdr

In [3]:
standard_date = '2010-07-01'
dow = fdr.DataReader('DJI', standard_date)
dow.head()

Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-07-01,9732.53,9773.27,9834.71,9596.04,262820000.0,-0.0042
2010-07-02,9686.48,9732.23,9798.19,9603.8,199570000.0,-0.0047
2010-07-06,9743.62,9689.21,9880.76,9648.26,216710000.0,0.0059
2010-07-07,10018.28,9736.85,10029.93,9716.38,219560000.0,0.0282
2010-07-08,10138.99,10019.26,10175.02,9987.02,192220000.0,0.012


In [4]:
kospi = fdr.DataReader('KS11', standard_date)
kospi.head()

Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-07-01,1686.24,1687.0,1693.9,1664.54,327660000.0,-0.0071
2010-07-02,1671.82,1688.95,1693.75,1667.38,344940000.0,-0.0086
2010-07-05,1675.37,1675.43,1680.39,1664.22,356620000.0,0.0021
2010-07-06,1684.94,1664.87,1684.94,1650.3,337490000.0,0.0057
2010-07-07,1675.65,1685.77,1685.77,1668.61,345700000.0,-0.0055


## 데이터 전처리

In [5]:
import pandas as pd

In [6]:
df = pd.DataFrame({'DOW' : dow['Close'], 'KOSPI' : kospi['Close']})
df

Unnamed: 0_level_0,DOW,KOSPI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-07-01,9732.53,1686.24
2010-07-02,9686.48,1671.82
2010-07-05,,1675.37
2010-07-06,9743.62,1684.94
2010-07-07,10018.28,1675.65
...,...,...
2021-07-19,33963.29,3244.04
2021-07-20,34511.86,3232.70
2021-07-21,34797.74,3215.91
2021-07-22,34822.75,3250.21


In [7]:
df.isnull().sum()

DOW       85
KOSPI    137
dtype: int64

In [8]:
df = df.fillna(method='bfill')
df = df.fillna(method='ffill')
df.isnull().sum()

DOW      0
KOSPI    0
dtype: int64

# 선형 회귀 분석

## 사이파이를 이용한 선형 회귀

In [9]:
!pip install scipy



In [10]:
from scipy import stats

* model = stats.linregress(독립 변수 x, 종속 변수 y)  
* model.slope : 기울기  
* model.intercept : y절편

In [11]:
regr = stats.linregress(df['DOW'], df['KOSPI'])
regr

LinregressResult(slope=0.037579050604805715, intercept=1391.4901953723474, rvalue=0.7663662748292898, pvalue=0.0, stderr=0.0005883066014326273)

In [12]:
regr.slope

0.037579050604805715

In [13]:
regr.intercept

1391.4901953723474

In [14]:
regr_line = f'Y = {regr.slope:.3f} * X + {regr.intercept:.2f}'
regr_line

'Y = 0.038 * X + 1391.49'

# 케라스를 이용한 선형 회귀 기초

In [22]:
from tensorflow.keras.layers     import Dense
from tensorflow.keras.models     import Sequential
from tensorflow.keras.callbacks  import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

- Adam은 sgd(일반 경사하강법)에서 보완한 더 좋은 경사하강법
- optimizer=Adam 써주면 sgd 썼을 때보다 성능이 더 좋을 수 있음

**딥러닝 실행 순서**
1. 전처리: 학습에 필요한 데이터 전처리를 수행  
2. 모델링(model): 모델을 정의  
3. 컴파일(compile): 모델을 생성  
4. 학습 (fit): 모델을 학습

**적절한 optimizer와 loss 선정하기**
- regression(회귀) 예측을 위해서는 loss=mse를 선택  
- optimizer는 여러가지를 활용할 수는 있지만, 단순 회귀에는 sgd가 적당

In [16]:
X_train = np.array(df['DOW'])
Y_train = np.array(df['KOSPI'])

In [17]:
# 모델의 정의(modeling)
model = Sequential()

# Dense의 첫번째 인자는 항상 출력의 차원을 의미
model.add(Dense(1, input_dim=1, activation='linear'))

adam = Adam(lr=0.05)

# 모델의 생성(compile)
model.compile(optimizer=adam, loss='mse')

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
# 학습(fit)
result = model.fit(X_train, Y_train, epochs=1200, verbose=1)

In [19]:
weight = float(model.layers[0].get_weights()[0])
bias = float(model.layers[0].get_weights()[1])

In [20]:
keras_line = f'Y = {weight:.3f} * X + {bias:.2f}'
keras_line

'Y = 0.031 * X + 1362.57'