In [1]:
# import

import pandas as pd
import numpy as np
import os
import FinanceDataReader as fdr

from sklearn.linear_model import LinearRegression
from tqdm import tqdm

In [2]:
# Get Stock List
path = './open'
list_name = 'stock_list.csv'
sample_name = 'sample_submission.csv'

stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,005930,KOSPI
1,SK하이닉스,000660,KOSPI
2,NAVER,035420,KOSPI
3,카카오,035720,KOSPI
4,삼성바이오로직스,207940,KOSPI
...,...,...,...
365,맘스터치,220630,KOSDAQ
366,다날,064260,KOSDAQ
367,제이시스메디칼,287410,KOSDAQ
368,크리스에프앤씨,110790,KOSDAQ


In [3]:
# Get Data & Modeling
start_date = '20210104'
end_date = '20211105'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

print(f'WEEKDAY of "start_date" : {start_weekday}')
print(f'NUM of WEEKS to "end_date" : {max_weeknum}')
print(f'HOW MANY "Business_days" : {Business_days.shape}', )
display(Business_days.head())

WEEKDAY of "start_date" : 0
NUM of WEEKS to "end_date" : 44
HOW MANY "Business_days" : (220, 1)


Unnamed: 0,Date
0,2021-01-04
1,2021-01-05
2,2021-01-06
3,2021-01-07
4,2021-01-08


In [4]:
# Baseline
sample_code = stock_list.loc[0,'종목코드']

sample = fdr.DataReader(sample_code, start = start_date, end = end_date)[['Close']].reset_index()
sample = pd.merge(Business_days, sample, how = 'outer')
sample['weekday'] = sample.Date.apply(lambda x : x.weekday())
sample['weeknum'] = sample.Date.apply(lambda x : x.strftime('%V'))
sample.Close = sample.Close.ffill()
sample = pd.pivot_table(data = sample, values = 'Close', columns = 'weekday', index = 'weeknum')
sample.head()

weekday,0,1,2,3,4
weeknum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,83000.0,83900.0,82200.0,82900.0,88800.0
2,91000.0,90600.0,89700.0,89700.0,88000.0
3,85000.0,87000.0,87200.0,88100.0,86800.0
4,89400.0,86700.0,85600.0,83700.0,82000.0
5,83000.0,84400.0,84600.0,82500.0,83500.0


In [5]:
model = LinearRegression()

In [6]:
x = sample.iloc[0:-2].to_numpy()
x.shape

(42, 5)

In [7]:
y = sample.iloc[1:-1].to_numpy()
y_0 = y[:,0]
y_1 = y[:,1]
y_2 = y[:,2]
y_3 = y[:,3]
y_4 = y[:,4]

y_values = [y_0, y_1, y_2, y_3, y_4]

In [8]:
x_public = sample.iloc[-2].to_numpy()

In [9]:
# 예측
predictions = []
for y_value in y_values :
    model.fit(x,y_value)
    prediction = model.predict(np.expand_dims(x_public,0))
    predictions.append(prediction[0])
predictions

[70206.67660106532,
 69631.42785252718,
 69062.32129096359,
 69258.21096883612,
 68846.00977524316]

In [10]:
# 실제 Public 값
sample.iloc[-1].values

array([69900., 71500., 70400., 70600., 70200.])

In [11]:
####

In [12]:
###

In [13]:
# 전체 모델링

In [14]:
sample_name = 'sample_submission.csv'
sample_submission = pd.read_csv(os.path.join(path,sample_name))

In [15]:
model = LinearRegression()
for code in tqdm(stock_list['종목코드'].values):
    data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data = pd.merge(Business_days, data, how = 'outer')
    data['weekday'] = data.Date.apply(lambda x : x.weekday())
    data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
    data.Close = data.Close.ffill()
    data = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')
    
    x = data.iloc[0:-2].to_numpy() # 2021년 1월 04일 ~ 2021년 10월 22일까지의 데이터로
    y = data.iloc[1:-1].to_numpy() # 2021년 1월 11일 ~ 2021년 10월 29일까지의 데이터를 학습한다.
    y_0 = y[:,0]
    y_1 = y[:,1]
    y_2 = y[:,2]
    y_3 = y[:,3]
    y_4 = y[:,4]

    y_values = [y_0, y_1, y_2, y_3, y_4]
    x_public = data.iloc[-2].to_numpy() # 2021년 11월 1일부터 11월 5일까지의 데이터를 예측할 것이다.
    
    predictions = []
    for y_value in y_values :
        model.fit(x,y_value)
        prediction = model.predict(np.expand_dims(x_public,0))
        predictions.append(prediction[0])
    sample_submission.loc[:,code] = predictions * 2
sample_submission.isna().sum().sum()

100%|██████████| 370/370 [01:29<00:00,  4.15it/s]


0

In [16]:
sample_submission.columns

Index(['Day', '000060', '000080', '000100', '000120', '000150', '000240',
       '000250', '000270', '000660',
       ...
       '330860', '336260', '336370', '347860', '348150', '348210', '352820',
       '357780', '363280', '950130'],
      dtype='object', length=371)

In [17]:
columns = list(sample_submission.columns[1:])

columns = ['Day'] + [str(x).zfill(6) for x in columns]

sample_submission.columns = columns

In [18]:
sample_submission.to_csv('BASELINE_Linear.csv',index=False)

In [19]:
sample_submission

Unnamed: 0,Day,000060,000080,000100,000120,000150,000240,000250,000270,000660,...,330860,336260,336370,347860,348150,348210,352820,357780,363280,950130
0,2021-11-01,27919.530611,34687.673458,58163.382835,142621.815394,104901.698658,16669.447967,47219.595113,85236.83317,103490.352393,...,49749.405974,51984.322942,81326.862561,36846.592704,25721.026664,53328.350326,336697.743579,262257.538308,27176.08509,17382.219194
1,2021-11-02,28750.750484,35032.651375,57721.67928,143012.927861,107216.342323,17001.594758,46672.351191,85360.327648,102788.687368,...,48923.253693,51539.56413,81668.51151,35258.227509,25330.52806,53645.466661,335662.149461,264562.230652,27415.549191,17338.522537
2,2021-11-03,28858.095631,34995.888574,57654.139411,145626.792237,111192.733424,17665.577952,45757.683516,85665.326378,102943.419081,...,48834.428543,49341.153729,82354.078682,35320.479243,25623.131825,55136.12148,329167.718872,264623.119599,27466.446666,17286.577226
3,2021-11-04,28901.301911,34866.098057,57137.803907,145351.536595,109530.741544,17846.022961,46380.15235,85494.864447,99958.476851,...,48954.93564,48626.256108,81749.886637,34343.016848,25878.11514,55964.135511,329482.609718,261821.140588,27594.351745,17247.876124
4,2021-11-05,28152.93854,34873.93908,57392.72038,143505.827198,108761.777883,18078.266972,46975.701291,84943.135732,100294.829339,...,49427.019462,47063.105078,83266.468952,34062.808374,26472.657621,55323.587424,321108.356663,264131.897754,27408.36665,17492.773824
5,2021-11-29,27919.530611,34687.673458,58163.382835,142621.815394,104901.698658,16669.447967,47219.595113,85236.83317,103490.352393,...,49749.405974,51984.322942,81326.862561,36846.592704,25721.026664,53328.350326,336697.743579,262257.538308,27176.08509,17382.219194
6,2021-11-30,28750.750484,35032.651375,57721.67928,143012.927861,107216.342323,17001.594758,46672.351191,85360.327648,102788.687368,...,48923.253693,51539.56413,81668.51151,35258.227509,25330.52806,53645.466661,335662.149461,264562.230652,27415.549191,17338.522537
7,2021-12-01,28858.095631,34995.888574,57654.139411,145626.792237,111192.733424,17665.577952,45757.683516,85665.326378,102943.419081,...,48834.428543,49341.153729,82354.078682,35320.479243,25623.131825,55136.12148,329167.718872,264623.119599,27466.446666,17286.577226
8,2021-12-02,28901.301911,34866.098057,57137.803907,145351.536595,109530.741544,17846.022961,46380.15235,85494.864447,99958.476851,...,48954.93564,48626.256108,81749.886637,34343.016848,25878.11514,55964.135511,329482.609718,261821.140588,27594.351745,17247.876124
9,2021-12-03,28152.93854,34873.93908,57392.72038,143505.827198,108761.777883,18078.266972,46975.701291,84943.135732,100294.829339,...,49427.019462,47063.105078,83266.468952,34062.808374,26472.657621,55323.587424,321108.356663,264131.897754,27408.36665,17492.773824
