In [2]:
pwd

'C:\\Users\\user\\Downloads'

In [None]:
# data preparation

In [None]:
!pip install pandas_datareader
!pip install yfinance

In [None]:
import pandas as pd
import yfinance as yf
import os
from pandas_datareader import data as pdr
from datetime import datetime, timedelta

In [None]:
os.mkdir("KOSPI_200")
os.mkdir("KOSPI_200_OHLCV") # directory 생성

In [None]:
start_dt = datetime.now() + timedelta(weeks=-52 * 1)
table = pdr.get_data_yahoo('035720' + '.KS', start=start_dt).reset_index()['Date']
todayYYMMDD = datetime.today().strftime("%Y%m%d")
table_today = table[-1:]

In [None]:
import multiprocessing
from functools import partial
from contextlib import contextmanager

In [None]:
def init_load_ohlcv(kospi_value, f_data):
    t_k = f_data[kospi_value + '.KS']
    t_k['Date'] = t_k.index
    new_col = t_k.columns[-1:].to_list() + t_k.columns[:-1].to_list()
    t_k = t_k[new_col]
    t_k = t_k.drop(['Adj Close'], axis=1)
    t_k.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
    t_k['Code'] = kospi_value
    pd.DataFrame(t_k).to_csv('./KOSPI_200_OHLCV/' + kospi_value + '_OHLCV_' + todayYYMMDD + '.csv', index=False)

In [None]:
if __name__ == '__main__':
    # KOSPI 200 종목 갱신 여부 (Default : N) 1년에 2회 갱신하지만 날짜가 고정되어 있지 않기 때문에 사용자가 직접 설정할 필요가 있음
    load_kospi_list_yn = 'N'  # KOSPI 200 List Reloading Flag
    load_kospi_ohlcv_yn_init = 'Y'  # KOSPI 200에 대한 ohlcv 정보를 처음부터 Reload Flag

    if load_kospi_list_yn == 'Y':
        # 종목코드 불러오기 (기업공시채널)
        stock_code = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download', header=0)[0]
        stock_code = stock_code[['회사명', '종목코드']]
        # rename(columns = {'원래 이름' : '바꿀 이름'}) 칼럼 이름 바꾸기
        stock_code = stock_code.rename(columns={'회사명': 'company', '종목코드': 'code'})
        # 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
        stock_code.code = stock_code.code.map('{:06d}'.format)  # 6자리가 아닌 수를 앞에 0으로 채우기 위함
        stock_code.tail(3)

        # KOSPI 200 코드만 불러오기
        import bs4
        from urllib.request import urlopen  # url의 소스코드를 긁어오는 기능

        # //*[@id="tab_con1"]/div[3]/table/tbody/tr[1]/td/span[1]/em
        company_name = []
        for i in range(1, 21):
            page = i
            url = 'https://finance.naver.com/sise/entryJongmok.nhn?&page={page}'.format(page=page)
            source = urlopen(url).read()
            source = bs4.BeautifulSoup(source, 'lxml')
            source = source.find_all('a', target='_parent')
            for j in range(len(source)):
                name = source[j].text
                company_name.append(name)

        code = []
        for i in company_name:
            for j in range(len(stock_code)):
                if stock_code['company'][j] == i:
                    code.append([stock_code['code'][j], stock_code['company'][j]])
                    break

        # 불러온 KOSPI 200 정보 CSV파일로 저장
        hydf = pd.DataFrame(code)
        hydf.columns = ['code', 'company']
        hydf.to_csv('./KOSPI_200/kospi200_' + todayYYMMDD + '.csv', index=False)

    # KOSPI 200 List LOADING (New)
    dir = './KOSPI_200'
    file = max(os.listdir(dir))
    dir = dir + '/' + file
    KOSPI_200_df = pd.read_csv(dir).iloc[:100, :]  # top 100개만 추출

    start_dt = datetime.now() + timedelta(weeks=-52 * 1)

    # KOSPI 200 종목에 대한 OHLCV정보 가져오기
    kospi_200_ophcv = []
    paralled_jobs = 6
    yf.pdr_override()

    import numpy as np
    KOSPI_200_df['code'] = KOSPI_200_df['code'].map('{:06d}'.format)
    KOSPI_200_df['code_ks'] = KOSPI_200_df['code'] + '.KS'
    kospi_200_list = np.array(KOSPI_200_df['code']).tolist()

    if load_kospi_ohlcv_yn_init == 'Y':
        import time
        import multiprocessing

        start = time.perf_counter()
        # yfinance에서 Thread를 이용하여 Multi Stock Base Download를 제공하기 때문에 해당 기능 사용
        kospi_200_list = np.array(KOSPI_200_df['code_ks']).tolist()
        data = yf.download(
            tickers=kospi_200_list,
            threads=True,
            group_by='ticker',
            start=start_dt
        )

        # MultiProcessing
        pool = multiprocessing.Pool(processes=paralled_jobs)  # process count setting
        partial_func = partial(init_load_ohlcv, f_data=data)
        pool.map(partial_func, [i for i in KOSPI_200_df['code'].values])
        pool.close()
        pool.join()

        # No MultiProcessing
        # for kospi_value in KOSPI_200_df['code']:
        #     t_k = data[kospi_value + '.KS']
        #     t_k['Date'] = t_k.index
        #     new_col = t_k.columns[-1:].to_list() + t_k.columns[:-1].to_list()
        #     t_k = t_k[new_col]
        #     t_k = t_k.drop(['Adj Close'], axis=1)
        #     t_k.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
        #     t_k['Code'] = kospi_value
        #     pd.DataFrame(t_k).to_csv('./KOSPI_200_OHLCV/' + kospi_value + '_OHLCV_' + todayYYMMDD + '.csv', index=False)

        finish = time.perf_counter()
        print(f'Finished in {round(finish - start, 2)} second(s)')

    '''
    else:
        dir = './KOSPI_200_OHLCV'
        file = max(os.listdir(dir))
        dir = dir + '/' + file
        KOSPI_200_OHLCV_df = pd.read_csv(dir)
        table = KOSPI_200_OHLCV_df[pd.to_datetime(KOSPI_200_OHLCV_df['Date'], format = '%Y-%m-%d') >= start_dt]
        table2 = pool.map(load_ohlcv, [i for i in KOSPI_200_df.values])
        pool.close()
        pool.join()
        for i in table2:
            table_today = pd.merge(table_today, i, how='left', on='Date')
        print(table_today)
        table_today['Date'] = table_today['Date'].apply(lambda x : datetime.strftime(x,"%Y-%m-%d"))
        table = table[table['Date'] < datetime.today().strftime("%Y-%m-%d")]
        result_Table = pd.concat([table,table_today], ignore_index=True)
        pd.DataFrame(result_Table).to_csv('./KOSPI_200_OHLCV/kospi200_OHLCV' + todayYYMMDD + '.csv', index=False)
     '''

In [None]:
# data 병합

import pandas as pd
import glob
import os

input_file = 'KOSPI_200_OHLCV' # csv파일들이 있는 디렉토리 위치
output_file = 'KOSPI_200_OHLCV/untitled.csv' # 병합하고 저장하려는 파일명

allFile_list = glob.glob(os.path.join(input_file,'*.csv'))
print(allFile_list)
allData = [] # 읽어 들인 csv파일 내용을 저장할 빈 리스트를 하나 만든다
for file in allFile_list:
    df = pd.read_csv(file) # for구문으로 csv파일들을 읽어 들인다
    allData.append(df) # 빈 리스트에 읽어 들인 내용을 추가한다
dataCombine = pd.concat(allData, axis=0, ignore_index=True) # concat함수를 이용해서 리스트의 내용을 병합
# axis=0은 수직으로 병합함. axis=1은 수평. ignore_index=True는 인데스 값이 기존 순서를 무시하고 순서대로 정렬되도록 한다.
dataCombine.to_csv(output_file,index=False) 

In [None]:
# trading 

In [None]:
import numpy as np
import pandas as pd

In [None]:
trd = pd.read_csv('KOSPI_200_OHLCV/trading_dataset.csv')
trd['Date'] = pd.to_datetime(trd['Date'], format='%Y-%m-%d')

In [None]:
from datetime import date
from datetime import date, timedelta

today = date.today()
sell_date = pd.date_range(start='2022-01-01', end='today', freq='W-FRI')
buy_date = pd.date_range(start='2022-01-01', end='today', freq='W-MON')

In [None]:
def stock_pick(c):
    start_date = buy_date[c] - timedelta(weeks=1)
    end_date = sell_date[c] - timedelta(weeks=1)
    trd_stock = trd[(trd['Date'] >= start_date) & (trd['Date'] <= end_date)]
    days = trd_stock.groupby('Code').count()['Date'].iloc[0]
    
    if (days == 5):
        trdd = trd[(trd['Date'] >= start_date) & (trd['Date'] <= end_date)]
        trd1 = trdd[['Close', 'Volume', 'Code']].groupby('Code').diff().diff().rename(columns = {'Close' : 'diff_Close' , 'Volume' : 'diff_Volume'})
        trd2 = trd1.diff().diff().sort_values(by = 'diff_Close', ascending = False)
        trd2 = pd.concat([trdd,trd2],axis=1)
        trd3 = trd2.sort_values(by = 'diff_Close', ascending = False).head(10)
        trd4 = trd3.sort_values(by = 'diff_Volume', ascending = False).head(1)
        df_stock = int(trd4['Code'])
  
    elif (days == 4):
        trdd = trd[(trd['Date'] >= start_date) & (trd['Date'] <= end_date)]
        trd1 = trdd[['Close', 'Volume', 'Code']].groupby('Code').diff().diff().rename(columns = {'Close' : 'diff_Close' , 'Volume' : 'diff_Volume'})
        trd2 = trd1.diff().sort_values(by = 'diff_Close', ascending = False)
        trd2 = pd.concat([trdd,trd2],axis=1)
        trd3 = trd2.sort_values(by = 'diff_Close', ascending = False).head(10)
        trd4 = trd3.sort_values(by = 'diff_Volume', ascending = False).head(1)
        df_stock = int(trd4['Code'])
        
    else:
        df_stock = None
        
    return df_stock

In [None]:
def trade(z): 
     
    bank = 10000000
    trd_stock = trd[(trd['Date'] >= buy_date[z]) & (trd['Date'] <= sell_date[z])]
    days = trd_stock.groupby('Code').count()['Date'].iloc[0]
    
    if stock_pick(z) != None:
        trd_week = trd_stock[trd_stock['Code'] == stock_pick(z)]
        stock_code = stock_pick(z)
        buy_stock_price = trd_week.iloc[0]['Open']
        hold_stock = bank // buy_stock_price
        sell_stock_price = trd_week.iloc[days-1]['Close']
        profit = (sell_stock_price - buy_stock_price) * hold_stock
        return { 'stock_code' : stock_code ,
                 'buy_stock_price' : buy_stock_price,
                 'hold_stock' : hold_stock,
                 'sell_stock_price' : sell_stock_price,
                 'profit' : profit }
    
    else:
        return { 'stock_code' : None ,
                 'buy_stock_price' : None,
                 'hold_stock' : None,
                 'sell_stock_price' : None,
                 'profit' : 0 }

In [None]:
def summary(i):
        
    if trade(i)['stock_code'] != None:
        print('week : ' + str(i))
        print('stock_code : %06d'  %trade(i)['stock_code'])
        print('buy_stock_price : %.f' %trade(i)['buy_stock_price'])
        print('hold_stock :' , trade(i)['hold_stock'])
        print('sell_stock_price :' , trade(i)['sell_stock_price'])
        print('profit :' , trade(i)['profit'])
        print(' ')

    else:
        print('week : ' + str(i))
        print('No trade')
        print(' ')

In [None]:
def play():
    total_profit = 0
    for i in range(32):
        trade(i)
        summary(i)
        total_profit += trade(i)['profit']
    print(total_profit)    