In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install finance-datareader

Collecting finance-datareader
  Downloading finance_datareader-0.9.31-py3-none-any.whl (17 kB)
Collecting requests-file
  Downloading requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)
Installing collected packages: requests-file, finance-datareader
Successfully installed finance-datareader-0.9.31 requests-file-1.5.1


In [None]:
import pandas as pd
import numpy as np
import os
import FinanceDataReader as fdr

from sklearn.linear_model import LinearRegression
from tqdm import tqdm

## Get Stock List

In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/k-digital 3차/dacon_stock'
list_name = 'Stock_List.csv'
sample_name = 'sample_submission_week4.csv'

#csv에서 stock list 불러오기
stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,005930,KOSPI
1,SK하이닉스,000660,KOSPI
2,NAVER,035420,KOSPI
3,카카오,035720,KOSPI
4,삼성바이오로직스,207940,KOSPI
...,...,...,...
371,더네이쳐홀딩스,298540,KOSDAQ
372,코엔텍,029960,KOSDAQ
373,원익홀딩스,030530,KOSDAQ
374,웹케시,053580,KOSDAQ


## Get Data & Modeling

In [None]:
start_date = '20210104'
end_date = '20210910'

#start_date 요일 숫자로 받아오기
start_weekday = pd.to_datetime(start_date).weekday()

#월요일을 첫째 요일이라는 기준으로 end_date의 주차를 10진수 str으로 변환
max_weeknum = pd.to_datetime(end_date).strftime('%V')

#start ~ end_date 영업일 저장
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

print(f'WEEKDAY of "start_date" : {start_weekday}')
print(f'NUM of WEEKS to "end_date" : {max_weeknum}')
print(f'HOW MANY "Business_days" : {Business_days.shape}', )
display(Business_days.head())

WEEKDAY of "start_date" : 0
NUM of WEEKS to "end_date" : 36
HOW MANY "Business_days" : (180, 1)


Unnamed: 0,Date
0,2021-01-04
1,2021-01-05
2,2021-01-06
3,2021-01-07
4,2021-01-08


## Baseline 모델의 구성 소개 ( Sample )

- X : (월 ~ 금) * 33주간
- y : (다음주 월 ~ 금) * 33주간
    - y_0 : 다음주 월요일
    - y_1 : 다음주 화요일
    - y_2 : 다음주 수요일
    - y_3 : 다음주 목요일
    - y_4 : 다음주 금요일


- 이번주 월~금요일의 패턴을 학습해 다음주 월요일 ~ 금요일을 각각 예측하는 모델을 생성
    
- 이 과정을 모든 종목(376개)에 적용

In [None]:
sample_code = stock_list.loc[0,'종목코드']

#기간동안의 종가('Close') 데이터만 가져오기
sample = fdr.DataReader(sample_code, start = start_date, end = end_date)[['Close']].reset_index()

#sample에 없는 Business_days 를 Date에 추가 (Close 값은 없음)
sample = pd.merge(Business_days, sample, how = 'outer')

#날짜에 해당하는 요일값 'weekday' column에 추가
sample['weekday'] = sample.Date.apply(lambda x : x.weekday())

#주차를 'weeknum' column에 추가
sample['weeknum'] = sample.Date.apply(lambda x : x.strftime('%V'))

#인덱스상 앞의 데이터로 NULL 채움
sample.Close = sample.Close.ffill()

#pivot
sample = pd.pivot_table(data = sample, values = 'Close', columns = 'weekday', index = 'weeknum')
sample.head()

weekday,0,1,2,3,4
weeknum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,83000.0,83900.0,82200.0,82900.0,88800.0
2,91000.0,90600.0,89700.0,89700.0,88000.0
3,85000.0,87000.0,87200.0,88100.0,86800.0
4,89400.0,86700.0,85600.0,83700.0,82000.0
5,83000.0,84400.0,84600.0,82500.0,83500.0


In [None]:
#모델 저장
model = LinearRegression()

In [None]:
#최근 2주차만 남겨두고 x에 ndarray로 가져오기
x = sample.iloc[0:-2].to_numpy()

#34주차 데이터임을 확인
x.shape

(34, 5)

In [None]:
#y에는 최근 1주차만 남겨두고 ndarray로 가져온다 
y = sample.iloc[1:-1].to_numpy()

#y_x 변수에 각각 요일 별 주차데이터를 저장
y_0 = y[:,0]
y_1 = y[:,1]
y_2 = y[:,2]
y_3 = y[:,3]
y_4 = y[:,4]

#리스트에 요일별 주차데이터를 저장
y_values = [y_0, y_1, y_2, y_3, y_4]

- 8월 마지막 주 예측

In [None]:
#마지막 주 x_public에 저장
x_public = sample.iloc[-2].to_numpy()

array([74600., 76700., 76800., 76000., 76600.])

- 예측

In [None]:
#요일별로 
predictions = []
for y_value in y_values :
    #요일별로 학습
    model.fit(x,y_value)
    #x_public에 차원추가 해서 예측
    prediction = model.predict(np.expand_dims(x_public,0))
    predictions.append(prediction[0])
predictions

[77302.1323626224,
 77260.59420262497,
 76717.45444247499,
 76882.61533987035,
 76437.26776807781]

- 실제 Public 값

In [None]:
#실제 값을 통해 예측값에 어느 정도 차이가 있는지 확인
sample.iloc[-1].values

array([77300., 76100., 76300., 75300., 75300.])

# 전체 모델링

In [None]:
#제출 csv 불러오기
sample_name = 'sample_submission_week4.csv'
sample_submission = pd.read_csv(os.path.join(path,sample_name))

In [None]:
#sample에서 했던 것과 같은 방법으로 제출파일 속 기업들로 모두 학습 & 예측 시행
model = LinearRegression()
for code in tqdm(stock_list['종목코드'].values):
    data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data = pd.merge(Business_days, data, how = 'outer')
    data['weekday'] = data.Date.apply(lambda x : x.weekday())
    data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
    data.Close = data.Close.ffill()
    data = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')
    
    x = data.iloc[0:-2].to_numpy() # 2021년 1월 04일 ~ 2021년 8월 13일까지의 데이터로
    y = data.iloc[1:-1].to_numpy() # 2021년 1월 11일 ~ 2021년 8월 20일까지의 데이터를 학습한다.
    y_0 = y[:,0]
    y_1 = y[:,1]
    y_2 = y[:,2]
    y_3 = y[:,3]
    y_4 = y[:,4]

    y_values = [y_0, y_1, y_2, y_3, y_4]
    x_public = data.iloc[-2].to_numpy() # 2021년 8월 23일부터 8월 27일까지의 데이터를 예측할 것이다.
    
    predictions = []
    for y_value in y_values :
        model.fit(x,y_value)
        prediction = model.predict(np.expand_dims(x_public,0))
        predictions.append(prediction[0])
    sample_submission.loc[:,code] = predictions * 2
sample_submission.isna().sum().sum()

100%|██████████| 376/376 [07:04<00:00,  1.13s/it]


0

In [None]:
sample_submission.columns

Index(['Day', '000060', '000080', '000100', '000120', '000150', '000240',
       '000250', '000270', '000660',
       ...
       '330860', '336260', '336370', '347860', '348150', '348210', '352820',
       '357780', '363280', '950130'],
      dtype='object', length=377)

In [None]:
columns = list(sample_submission.columns[1:])

In [None]:
columns = ['Day'] + [str(x).zfill(6) for x in columns]

In [None]:
sample_submission.columns = columns

In [None]:
#
sample_submission.to_csv('BASELINE_Linear_Week4.csv',index=False)

In [None]:
#제출파일 미리 확인
sample_submission

Unnamed: 0,Day,000060,000080,000100,000120,000150,000240,000250,000270,000660,000670,000720,000810,000880,000990,001230,001440,001450,001740,002380,002790,003000,003090,003380,003410,003490,003670,003800,004000,004020,004170,004370,004490,004800,004990,005250,005290,005300,005380,005385,...,273130,278280,278530,282330,285130,287410,290510,290650,292150,293490,293780,294090,294870,298000,298020,298050,298380,298540,299030,299660,299900,307950,314130,316140,319400,319660,321550,323990,326030,330590,330860,336260,336370,347860,348150,348210,352820,357780,363280,950130
0,2021-09-06,29187.850221,34252.993031,64981.783244,171597.990771,100563.147504,17133.635102,54031.024723,85481.464728,106677.498961,749479.176452,55440.879612,226979.543793,34629.096551,61837.205458,19737.477513,2666.253093,25497.98208,5475.176415,387456.605658,56822.63561,22387.29753,35914.9756,10080.270524,8273.295209,31467.866773,151417.604338,48780.649251,70639.540173,51009.186362,274406.914061,301245.269129,85297.347807,116846.599144,35802.048218,34778.971611,26256.404279,147198.652231,216016.527618,102027.426766,...,109797.02118,228979.481928,13748.167829,179686.736033,277892.083207,8060.876896,8264.298349,37534.161314,14464.053553,78475.631556,60804.039722,52318.338364,29589.63064,374797.142097,730488.018818,593692.828078,21418.411907,33542.270262,64281.224318,115834.272479,16445.004504,119752.879474,53524.58801,11203.700232,4558.87002,38537.848558,20171.68468,83399.580906,124433.584627,5638.023798,48854.075617,51846.172465,64053.935652,36437.294171,35337.71654,59233.126897,281163.482571,288124.234876,29824.046596,17783.411804
1,2021-09-07,29629.84835,34484.845743,64380.113311,172395.363944,96823.440614,17260.69308,53839.948372,84725.148644,106377.6053,753143.80383,55613.535215,227962.973536,35156.46712,62939.071757,20008.241626,2684.182837,25596.392616,5515.620859,393762.213866,57260.883101,22330.941219,35902.288409,10046.387002,8322.23235,31256.264942,150416.156474,52127.086296,70154.596723,51288.556817,277831.577577,303916.833149,86574.449768,117197.207971,36104.915823,34653.58885,26649.354803,144884.928749,215549.246801,102913.604473,...,109791.760621,224253.278854,13705.635575,178611.396081,282628.787418,8132.719803,8268.794947,37245.411179,14366.256199,79386.124303,58421.190814,51768.57574,29688.540785,383191.647393,732098.929988,600915.926572,21101.242648,32998.24813,62855.550095,115442.773197,16272.264953,120178.441193,53357.402132,11234.203629,4609.412408,38698.80377,20196.390372,82337.832869,125743.18703,5641.338021,48051.383808,50802.240768,62336.802895,34785.064114,35590.187437,60396.671058,281671.428372,294636.044506,30030.835535,17505.425336
2,2021-09-08,31405.583931,34432.908718,64122.802956,172742.767911,94991.568639,17627.73734,52907.687057,83638.038539,106303.305964,764023.649796,54957.736402,228774.910103,34878.424745,61718.027586,19969.71789,2604.708178,25601.835404,5541.336785,401499.728235,58745.820674,22090.731481,36735.370387,10153.157377,8313.090185,31275.611231,151919.160449,49994.491693,70224.571634,51143.149807,277714.735799,301621.647685,87276.197823,117661.561082,36050.39729,34494.248933,26520.569379,144075.538458,214662.043286,101367.039084,...,109803.73496,224188.457959,13654.652569,175898.216661,283144.952503,8029.616408,8452.955415,37707.655002,14265.858698,78796.95238,59454.534424,52195.658821,29579.108551,385876.385914,737516.942648,601312.200269,21029.255359,33035.194619,62857.498536,114170.42168,16241.283091,119891.717176,54221.084974,11290.168889,4765.234345,38704.362582,19846.264466,81226.823496,121599.566383,5630.846628,48325.9974,50958.778199,63013.012155,34586.802542,34753.248618,62229.232016,281198.786904,293994.542172,30060.148238,17568.946859
3,2021-09-09,31202.759683,34525.879984,63158.73528,173451.818542,93086.75093,17678.43332,53195.52303,84168.136,105450.566574,776148.047774,55592.922796,229138.381286,35282.834013,62864.21397,20094.777794,2586.095485,25622.58585,5542.83347,404606.783768,58453.528334,22000.802418,37184.719132,10147.042911,8297.081806,30959.493282,151001.961929,49000.521559,70127.061771,50667.252059,277404.868175,300894.904626,86128.022546,118332.679608,35966.54787,34126.357577,26893.535086,142934.664417,213625.908976,102694.894328,...,109814.929664,223558.985321,13730.502625,175948.928884,279401.113011,7994.076453,8478.288572,36485.066848,14297.445788,80808.300024,61326.944677,53233.979955,29558.20957,394836.002128,746071.733667,599471.513414,20366.078028,33492.888138,61998.667298,117266.953688,16093.196708,119832.516822,54980.249212,11333.468081,4700.121398,38713.752969,19188.166573,82628.396619,122050.420308,5649.580274,47216.723714,50135.494528,60875.855528,34365.83879,34815.121099,61911.733934,283467.768846,294361.564341,29896.565856,17553.054233
4,2021-09-10,30482.15075,34506.591163,63510.171548,172091.696176,90563.903718,17742.069834,53542.497441,84177.636966,105347.059213,769935.60247,54681.894318,229332.552099,34248.213597,61461.134901,20177.547922,2650.422143,25682.355578,5560.919412,402528.787585,58416.914396,22018.004563,36956.798481,10079.021594,8312.236564,30516.032458,149450.617764,48575.585729,70503.537054,50583.815301,275037.562147,301237.825626,86247.201558,117251.075458,35909.899728,33574.69359,27008.462492,141287.997992,215674.858611,103129.630006,...,109748.410505,220529.447641,13705.797551,176132.756909,276597.093013,7943.493062,8536.114422,36304.516506,14335.068236,81024.470373,63890.640383,53899.567534,29439.515283,388937.909172,748611.530754,598494.102877,20137.918871,33513.659112,61640.892087,116800.645421,15877.104747,119658.542853,55130.5931,11307.482986,4701.346792,38778.672309,19472.663331,82281.070869,121402.224587,5664.813122,47786.298398,47963.756974,60958.561892,33703.122935,35015.07259,60533.768231,278968.222703,297697.390484,29829.829098,17461.664762
5,2021-09-27,29187.850221,34252.993031,64981.783244,171597.990771,100563.147504,17133.635102,54031.024723,85481.464728,106677.498961,749479.176452,55440.879612,226979.543793,34629.096551,61837.205458,19737.477513,2666.253093,25497.98208,5475.176415,387456.605658,56822.63561,22387.29753,35914.9756,10080.270524,8273.295209,31467.866773,151417.604338,48780.649251,70639.540173,51009.186362,274406.914061,301245.269129,85297.347807,116846.599144,35802.048218,34778.971611,26256.404279,147198.652231,216016.527618,102027.426766,...,109797.02118,228979.481928,13748.167829,179686.736033,277892.083207,8060.876896,8264.298349,37534.161314,14464.053553,78475.631556,60804.039722,52318.338364,29589.63064,374797.142097,730488.018818,593692.828078,21418.411907,33542.270262,64281.224318,115834.272479,16445.004504,119752.879474,53524.58801,11203.700232,4558.87002,38537.848558,20171.68468,83399.580906,124433.584627,5638.023798,48854.075617,51846.172465,64053.935652,36437.294171,35337.71654,59233.126897,281163.482571,288124.234876,29824.046596,17783.411804
6,2021-09-28,29629.84835,34484.845743,64380.113311,172395.363944,96823.440614,17260.69308,53839.948372,84725.148644,106377.6053,753143.80383,55613.535215,227962.973536,35156.46712,62939.071757,20008.241626,2684.182837,25596.392616,5515.620859,393762.213866,57260.883101,22330.941219,35902.288409,10046.387002,8322.23235,31256.264942,150416.156474,52127.086296,70154.596723,51288.556817,277831.577577,303916.833149,86574.449768,117197.207971,36104.915823,34653.58885,26649.354803,144884.928749,215549.246801,102913.604473,...,109791.760621,224253.278854,13705.635575,178611.396081,282628.787418,8132.719803,8268.794947,37245.411179,14366.256199,79386.124303,58421.190814,51768.57574,29688.540785,383191.647393,732098.929988,600915.926572,21101.242648,32998.24813,62855.550095,115442.773197,16272.264953,120178.441193,53357.402132,11234.203629,4609.412408,38698.80377,20196.390372,82337.832869,125743.18703,5641.338021,48051.383808,50802.240768,62336.802895,34785.064114,35590.187437,60396.671058,281671.428372,294636.044506,30030.835535,17505.425336
7,2021-09-29,31405.583931,34432.908718,64122.802956,172742.767911,94991.568639,17627.73734,52907.687057,83638.038539,106303.305964,764023.649796,54957.736402,228774.910103,34878.424745,61718.027586,19969.71789,2604.708178,25601.835404,5541.336785,401499.728235,58745.820674,22090.731481,36735.370387,10153.157377,8313.090185,31275.611231,151919.160449,49994.491693,70224.571634,51143.149807,277714.735799,301621.647685,87276.197823,117661.561082,36050.39729,34494.248933,26520.569379,144075.538458,214662.043286,101367.039084,...,109803.73496,224188.457959,13654.652569,175898.216661,283144.952503,8029.616408,8452.955415,37707.655002,14265.858698,78796.95238,59454.534424,52195.658821,29579.108551,385876.385914,737516.942648,601312.200269,21029.255359,33035.194619,62857.498536,114170.42168,16241.283091,119891.717176,54221.084974,11290.168889,4765.234345,38704.362582,19846.264466,81226.823496,121599.566383,5630.846628,48325.9974,50958.778199,63013.012155,34586.802542,34753.248618,62229.232016,281198.786904,293994.542172,30060.148238,17568.946859
8,2021-09-30,31202.759683,34525.879984,63158.73528,173451.818542,93086.75093,17678.43332,53195.52303,84168.136,105450.566574,776148.047774,55592.922796,229138.381286,35282.834013,62864.21397,20094.777794,2586.095485,25622.58585,5542.83347,404606.783768,58453.528334,22000.802418,37184.719132,10147.042911,8297.081806,30959.493282,151001.961929,49000.521559,70127.061771,50667.252059,277404.868175,300894.904626,86128.022546,118332.679608,35966.54787,34126.357577,26893.535086,142934.664417,213625.908976,102694.894328,...,109814.929664,223558.985321,13730.502625,175948.928884,279401.113011,7994.076453,8478.288572,36485.066848,14297.445788,80808.300024,61326.944677,53233.979955,29558.20957,394836.002128,746071.733667,599471.513414,20366.078028,33492.888138,61998.667298,117266.953688,16093.196708,119832.516822,54980.249212,11333.468081,4700.121398,38713.752969,19188.166573,82628.396619,122050.420308,5649.580274,47216.723714,50135.494528,60875.855528,34365.83879,34815.121099,61911.733934,283467.768846,294361.564341,29896.565856,17553.054233
9,2021-10-01,30482.15075,34506.591163,63510.171548,172091.696176,90563.903718,17742.069834,53542.497441,84177.636966,105347.059213,769935.60247,54681.894318,229332.552099,34248.213597,61461.134901,20177.547922,2650.422143,25682.355578,5560.919412,402528.787585,58416.914396,22018.004563,36956.798481,10079.021594,8312.236564,30516.032458,149450.617764,48575.585729,70503.537054,50583.815301,275037.562147,301237.825626,86247.201558,117251.075458,35909.899728,33574.69359,27008.462492,141287.997992,215674.858611,103129.630006,...,109748.410505,220529.447641,13705.797551,176132.756909,276597.093013,7943.493062,8536.114422,36304.516506,14335.068236,81024.470373,63890.640383,53899.567534,29439.515283,388937.909172,748611.530754,598494.102877,20137.918871,33513.659112,61640.892087,116800.645421,15877.104747,119658.542853,55130.5931,11307.482986,4701.346792,38778.672309,19472.663331,82281.070869,121402.224587,5664.813122,47786.298398,47963.756974,60958.561892,33703.122935,35015.07259,60533.768231,278968.222703,297697.390484,29829.829098,17461.664762


!pip install dacon_submit_api-0.0.4-py3-none-any.whl

In [None]:
from dacon_submit_api import dacon_submit_api 
파일경로 =  './BASELINE_Linear_Week3.csv'
개인_Token = ''                      # 개인 토큰 발급 (https://dacon.io/account 에서 발급 가능)
대회ID = '235800'                    # 주식종가예측 대회 ID 
팀이름 = ''                          # 현 대회에 참여중인 팀 명 (https://dacon.io/competitions/official/235800/team 에서 확인 가능)
submission_메모_내용 = 'sample_submission'

result = dacon_submit_api.post_submission_file(
파일경로, 
개인_Token, 
대회ID, 
팀이름, 
submission_메모_내용)