# 4주차

In [45]:
import pandas as pd
import os
import FinanceDataReader as fdr

import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Malgun Gothic'

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from fbprophet import Prophet
from tqdm import tqdm

from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_absolute_error
import numpy as np
import random

from datetime import datetime as dt

import warnings
warnings.filterwarnings('ignore')

In [2]:
path = './open_week4/'
list_name = 'Stock_List.csv'
stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list.head() # 대형/중형주? 

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,5930,KOSPI
1,SK하이닉스,660,KOSPI
2,NAVER,35420,KOSPI
3,카카오,35720,KOSPI
4,삼성바이오로직스,207940,KOSPI


In [3]:
submission_raw = pd.read_csv(path+ 'sample_submission_week4.csv')
target_day = pd.to_datetime(submission_raw.Day).tolist()

In [4]:
submission_raw.Day.tolist()

['2021-09-06',
 '2021-09-07',
 '2021-09-08',
 '2021-09-09',
 '2021-09-10',
 '2021-09-27',
 '2021-09-28',
 '2021-09-29',
 '2021-09-30',
 '2021-10-01']

In [14]:
start_date = '20210101'
end_date = '20210905'

submission = submission_raw.copy()

for code in tqdm(stock_list.종목코드):
    data_raw = fdr.DataReader(code, start = start_date, end = end_date).Close

    sc = StandardScaler()
    data_scaled = pd.DataFrame({'ds':data_raw.index,
                                'y':sc.fit_transform(data_raw.values.reshape(-1,1)).flatten()})
    m = Prophet(yearly_seasonality=False,
                weekly_seasonality=False,
                daily_seasonality=False,
               changepoint_prior_scale= 0.5,
               changepoint_range=0.95)
    m.fit(data_scaled)
    future = m.make_future_dataframe(periods=28)
    forecast = m.predict(future)

    result = forecast[['ds','yhat']]
    result.yhat = sc.inverse_transform(result.yhat.values.reshape(-1,1))

    submission[code]=result[result.ds.map(lambda x:x in target_day)].yhat.values

100%|████████████████████████████████████████████████████████████████████████████████| 376/376 [10:27<00:00,  1.67s/it]


In [15]:
submission.to_csv('./submit/submission_{}.csv'.format(dt.now().strftime('%m%d_%H%M')), index=False)

In [8]:
def NMAE(y_true, y_hat):
    #y_true, y_hat = y_true.values, y_hat.values
    return mean_absolute_error(y_true, y_hat) / np.mean(np.abs(y_true))

In [5]:
answer = submission_raw.copy().iloc[:5,:]

start_date = '20210906'
end_date = '20210910'

for code in tqdm(stock_list.종목코드):
    answer[code] = fdr.DataReader(code, start = start_date, end = end_date).Close.values

100%|████████████████████████████████████████████████████████████████████████████████| 376/376 [00:50<00:00,  7.40it/s]


In [6]:
answer.to_csv('answer.csv', index=False)

In [49]:
total_score = 0

for code in answer.columns[1:]:
    score = NMAE(answer[code], submission.iloc[:5,:][code])
    total_score += score

In [51]:
total_score / len(answer.columns[1:]) * 100

4.666481722638392

In [6]:
answer = pd.read_csv('answer.csv')

In [11]:
start_date = '20210101'
end_date = '20210905'

submission = submission_raw.copy()

for code in tqdm(stock_list.종목코드):
    data_raw = fdr.DataReader(code, start = start_date, end = end_date).Close

    sc = StandardScaler()
    data_scaled = pd.DataFrame({'ds':data_raw.index,
                                'y':sc.fit_transform(data_raw.values.reshape(-1,1)).flatten()})
    m = Prophet(seasonality_mode='multiplicative',
                yearly_seasonality=False,
                weekly_seasonality=False,
                daily_seasonality=False,
               changepoint_prior_scale= 0.5,
               changepoint_range=0.95)
    m.add_seasonality(name='seasonality_1',period=0.1,fourier_order=5)
    m.add_seasonality(name='seasonality_2',period=0.3,fourier_order=5)
    
    m.fit(data_scaled)
    future = m.make_future_dataframe(periods=28)
    forecast = m.predict(future)

    result = forecast[['ds','yhat']]
    result.yhat = sc.inverse_transform(result.yhat.values.reshape(-1,1))

    submission[code]=result[result.ds.map(lambda x:x in target_day)].yhat.values

100%|████████████████████████████████████████████████████████████████████████████████| 376/376 [19:56<00:00,  3.18s/it]


In [12]:
total_score = 0

for code in answer.columns[1:]:
    score = NMAE(answer[code], submission.iloc[:5,:][code])
    total_score += score
    
print(total_score / len(answer.columns[1:]) * 100)

5.429581211210798


In [14]:
params_grid = {'fourier_order' : [5,10],
               'period':[0.1,0.3,0.5,0.7,1]
               }
grid = ParameterGrid(params_grid)

code_list = stock_list.종목코드.tolist()

In [17]:
model_parameters = pd.DataFrame(columns = ['code',
                                           'NMAE',
                                           'fourier_order',
                                           'period'
                                           ])

random.seed(0)
samples = random.sample(code_list, 10)

start_date = '20210101'
end_date = '20210905'

for code in tqdm(samples, position=1):
    data_raw = fdr.DataReader(code, start = start_date, end = end_date).Close

    sc = StandardScaler()
    data_scaled = pd.DataFrame({'ds':data_raw.index,
                                'y':sc.fit_transform(data_raw.values.reshape(-1,1)).flatten()})
    
    train = data_scaled[data_scaled.ds <= pd.to_datetime('2021-08-06')] 
    val = data_scaled[data_scaled.ds > pd.to_datetime('2021-08-06')] 

    for p in tqdm(grid, position=0):
        prophet = Prophet(seasonality_mode='multiplicative', 
                          yearly_seasonality=False,
                          weekly_seasonality=False,
                          daily_seasonality=False,
                          changepoint_prior_scale=0.5,
                          changepoint_range=0.95,
                          )
        prophet.add_seasonality(name='seasonality_1',
                                period=p['period'],
                                fourier_order=p['fourier_order'])
        prophet.fit(train)

        # 21일치를 예측합니다.
        future_data = prophet.make_future_dataframe(periods=28)
        forecast_data = prophet.predict(future_data)

        pred_y = forecast_data[forecast_data.ds.map(lambda x: x in val.ds.tolist())].yhat
        pred_y = sc.inverse_transform(pred_y.values.reshape(-1,1))

        y_true = sc.inverse_transform(val.y.values.reshape(-1,1))

        error =  NMAE(y_true, pred_y) * 100
        model_parameters = model_parameters.append({'code':code,
                                                    'NMAE':error,
                                                    'fourier_order':p['fourier_order'],
                                                    'period':p['period'],
                                                    },
                                                   ignore_index=True)


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [01:06<00:00,  6.68s/it][A

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:30<00:00,  3.09s/it][A

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:33<00:00,  3.32s/it][A

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:37<00:00,  3.72s/it][A

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:30<00:00,  3.01s/it][A

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:33<00:00,  3.38s/it][A

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:25<00:00,  2.53s/it][A

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:29<00:00,  2.98s/it][A

100%|██

In [19]:
model_parameters.sort_values(by=['code','NMAE'], inplace=True)

In [21]:
param_pivot= pd.pivot_table(data=model_parameters,
                   values='NMAE', 
                   index=['fourier_order','period'],
                   columns=['code']).reset_index()

param_pivot['mean'] = np.nanmean(param_pivot.iloc[:,5:].values, axis=1)

In [24]:
param_pivot.sort_values(by='mean')

code,fourier_order,period,004490,007390,016380,034730,039200,078340,078600,120110,218410,253450,mean
2,5,0.5,12.040811,4.492638,12.552014,6.242918,8.590245,4.103621,6.412182,7.011385,7.107441,1.90452,5.91033
4,5,1.0,12.040811,4.487772,12.552014,6.242918,8.590245,4.103621,6.412182,7.011385,7.107441,1.90452,5.91033
3,5,0.7,2.491864,4.596832,12.085195,5.906444,9.220238,4.587453,6.299297,6.711129,6.023791,2.816631,5.937855
0,5,0.1,12.040811,4.47468,12.62852,6.289374,8.500014,4.412429,6.45813,7.157573,6.93699,1.927917,5.954632
5,10,0.1,11.79642,4.547084,12.521568,6.545878,8.561221,4.180338,6.461271,7.261533,6.814426,2.07338,5.985435
7,10,0.5,11.796444,4.588384,12.491462,6.511669,8.296429,4.175004,6.702599,7.150982,7.115886,2.030875,5.997635
9,10,1.0,11.796444,4.57949,12.447862,6.511669,8.53611,4.202773,6.597465,7.150982,7.115886,1.980614,6.013643
8,10,0.7,2.496568,4.57593,12.312048,6.241514,9.245928,4.45491,6.423334,6.826372,6.700782,2.280574,6.024774
1,5,0.3,2.640341,4.912184,12.17765,6.219673,9.477613,4.71042,6.094565,6.912395,6.849064,2.350117,6.087693
6,10,0.3,12.014657,4.57266,12.364704,6.370883,9.22128,4.557945,6.263548,7.011178,7.109872,2.231262,6.109424


In [25]:
start_date = '20210101'
end_date = '20210905'

submission = submission_raw.copy()

for code in tqdm(stock_list.종목코드):
    data_raw = fdr.DataReader(code, start = start_date, end = end_date).Close

    sc = StandardScaler()
    data_scaled = pd.DataFrame({'ds':data_raw.index,
                                'y':sc.fit_transform(data_raw.values.reshape(-1,1)).flatten()})
    m = Prophet(seasonality_mode='multiplicative',
                yearly_seasonality=False,
                weekly_seasonality=False,
                daily_seasonality=False,
               changepoint_prior_scale= 0.5,
               changepoint_range=0.95)
    m.add_seasonality(name='seasonality_1',period=0.1,fourier_order=5)
    #m.add_seasonality(name='seasonality_2',period=0.3,fourier_order=5)
    
    m.fit(data_scaled)
    future = m.make_future_dataframe(periods=28)
    forecast = m.predict(future)

    result = forecast[['ds','yhat']]
    result.yhat = sc.inverse_transform(result.yhat.values.reshape(-1,1))

    submission[code]=result[result.ds.map(lambda x:x in target_day)].yhat.values

100%|████████████████████████████████████████████████████████████████████████████████| 376/376 [20:01<00:00,  3.19s/it]


In [26]:
total_score = 0

for code in answer.columns[1:]:
    score = NMAE(answer[code], submission.iloc[:5,:][code])
    total_score += score
    
print(total_score / len(answer.columns[1:]) * 100)

5.400702347582808


# without scaling

In [42]:
start_date = '20210101'
end_date = '20210905'

submission = submission_raw.copy()

for code in tqdm(stock_list.종목코드):
    data_raw = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data_raw = data_raw.rename({'Date':'ds', 'Close':'y'}, axis=1)

    m = Prophet(seasonality_mode='multiplicative',
                yearly_seasonality=False,
                weekly_seasonality=False,
                daily_seasonality=False,
               changepoint_prior_scale= 0.5,
               changepoint_range=0.95)
    #m.add_seasonality(name='seasonality_1',period=0.1,fourier_order=5)
    #m.add_seasonality(name='seasonality_2',period=0.3,fourier_order=5)
    
    m.fit(data_raw)
    future = m.make_future_dataframe(periods=28)
    forecast = m.predict(future)

    result = forecast[['ds','yhat']]

    submission[code]=result[result.ds.map(lambda x:x in target_day)].yhat.values

100%|████████████████████████████████████████████████████████████████████████████████| 376/376 [11:52<00:00,  1.89s/it]


In [43]:
total_score = 0

for code in answer.columns[1:]:
    score = NMAE(answer[code], submission.iloc[:5,:][code])
    total_score += score
    
print(total_score / len(answer.columns[1:]) * 100)

4.938432293432185


# MinMaxScaling

In [46]:
start_date = '20210101'
end_date = '20210905'

submission = submission_raw.copy()

for code in tqdm(stock_list.종목코드):
    data_raw = fdr.DataReader(code, start = start_date, end = end_date).Close

    sc = MinMaxScaler()
    data_scaled = pd.DataFrame({'ds':data_raw.index,
                                'y':sc.fit_transform(data_raw.values.reshape(-1,1)).flatten()})
    m = Prophet(yearly_seasonality=False,
                weekly_seasonality=False,
                daily_seasonality=False,
               changepoint_prior_scale= 0.5,
               changepoint_range=0.95)
    m.fit(data_scaled)
    future = m.make_future_dataframe(periods=28)
    forecast = m.predict(future)

    result = forecast[['ds','yhat']]
    result.yhat = sc.inverse_transform(result.yhat.values.reshape(-1,1))

    submission[code]=result[result.ds.map(lambda x:x in target_day)].yhat.values
    
total_score = 0

for code in answer.columns[1:]:
    score = NMAE(answer[code], submission.iloc[:5,:][code])
    total_score += score
    
print(total_score / len(answer.columns[1:]) * 100)

100%|████████████████████████████████████████████████████████████████████████████████| 376/376 [09:48<00:00,  1.56s/it]


4.4981115531623335


In [47]:
submission.to_csv('./submit/submission_{}.csv'.format(dt.now().strftime('%m%d_%H%M')), index=False)

# 휴일/주말 추가

In [57]:
holiday = pd.DataFrame({
    'holiday': 'holiday',
    'ds': pd.concat([
        pd.Series(pd.to_datetime('2021-01-01')),
        pd.Series(pd.date_range('2021-02-11', '2021-02-13', freq='D')),
        pd.Series(pd.to_datetime('2021-03-01')),
        pd.Series(pd.to_datetime('2021-05-05')),
        pd.Series(pd.to_datetime('2021-05-19')),
        pd.Series(pd.to_datetime('2021-06-06')),
        pd.Series(pd.to_datetime('2021-08-15')),
        pd.Series(pd.date_range('2021-09-20', '2021-09-22', freq='D'))
    ]),
    'lower_window': -1,
    'upper_window': 1})

In [58]:
holiday

Unnamed: 0,holiday,ds,lower_window,upper_window
0,holiday,2021-01-01,-1,1
0,holiday,2021-02-11,-1,1
1,holiday,2021-02-12,-1,1
2,holiday,2021-02-13,-1,1
0,holiday,2021-03-01,-1,1
0,holiday,2021-05-05,-1,1
0,holiday,2021-05-19,-1,1
0,holiday,2021-06-06,-1,1
0,holiday,2021-08-15,-1,1
0,holiday,2021-09-20,-1,1


In [59]:
start_date = '20210101'
end_date = '20210905'

submission = submission_raw.copy()

for code in tqdm(stock_list.종목코드):
    data_raw = fdr.DataReader(code, start = start_date, end = end_date).Close

    sc = MinMaxScaler()
    data_scaled = pd.DataFrame({'ds':data_raw.index,
                                'y':sc.fit_transform(data_raw.values.reshape(-1,1)).flatten()})
    
    m = Prophet(yearly_seasonality=False,
                weekly_seasonality=False,
                daily_seasonality=False,
                changepoint_prior_scale= 0.5,
                changepoint_range=0.95,
                holidays=holiday,
                holidays_prior_scale=5)
    m.fit(data_scaled)
    
    future = m.make_future_dataframe(periods=28)
    forecast = m.predict(future)

    result = forecast[['ds','yhat']]
    result.yhat = sc.inverse_transform(result.yhat.values.reshape(-1,1))

    submission[code]=result[result.ds.map(lambda x:x in target_day)].yhat.values
    
total_score = 0

for code in answer.columns[1:]:
    score = NMAE(answer[code], submission.iloc[:5,:][code])
    total_score += score
    
print(total_score / len(answer.columns[1:]) * 100)

100%|████████████████████████████████████████████████████████████████████████████████| 376/376 [10:31<00:00,  1.68s/it]


4.510359237787665
