# Zipline Custome Bundle 생성
기존에 Zipline Bundle은 미국주식으로만 되어 있었다. 국내주식을 이용하시 위해서 한국주식으로 되어 있는 custom bundle을 생성해서 사용해보도록 하자

## Imports & Settings

In [1]:
from pathlib import Path 
import warnings
import pandas as pd 

warnings.filterwarnings('ignore')

DATA_DIR = Path('..', '..', 'data')
idx = pd.IndexSlice

In [2]:
def create_split_table():
    with pd.HDFStore('finance_datareader.h5') as store:
        store.put('kr/splits', pd.DataFrame(columns=['sid', 'effective_date', 'ratio'],
                                            data=[[1, pd.to_datetime('2010-01-01'), 1.0]]), format='t')

def load_prices():
    df = pd.read_hdf(DATA_DIR / 'assets.h5', 'finance_datareader/prices')

    return (df.loc[idx['2010': '2020', :], :]
            .reindex(columns=['adj_open', 'adj_high', 'adj_low', 'adj_close', 'adj_volume'])
            .rename(columns=lambda x: x.replace('adj_', ''))
            .unstack('ticker')
            .sort_index()
            .tz_localize('UTC')
            .ffill(limit=5)
            .dropna(axis=1)
            .stack('ticker')
            .swaplevel()
           )

def load_symbols(tickers):
    df = pd.read_hdf(DATA_DIR / 'assets.h5', 'finance_datareader/stocks')
    df.index.names = ['ticker']
    df.reset_index(inplace=True)
    return (df[df.ticker.isin(tickers)]
            .reset_index(drop=True)
            .reset_index()
            .rename(columns={'index': 'sid'}))



In [3]:
prices = load_prices()

In [4]:
print(prices.info(null_counts=True))
tickers = prices.index.unique('ticker')

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1082886 entries, ('000020', Timestamp('2010-01-04 00:00:00+0000', tz='UTC')) to ('114090', Timestamp('2020-12-30 00:00:00+0000', tz='UTC'))
Data columns (total 5 columns):
 #   Column  Non-Null Count    Dtype  
---  ------  --------------    -----  
 0   open    1082886 non-null  float64
 1   high    1082886 non-null  float64
 2   low     1082886 non-null  float64
 3   close   1082886 non-null  float64
 4   volume  1082886 non-null  float64
dtypes: float64(5)
memory usage: 45.6+ MB
None


In [5]:
symbols = load_symbols(tickers)
print(symbols.info(null_counts=True))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   sid     398 non-null    int64 
 1   ticker  398 non-null    object
 2   name    398 non-null    object
dtypes: int64(1), object(2)
memory usage: 9.5+ KB
None


In [6]:
# ticker 저장
symbols.to_hdf('finance_datareader.h5', 'kr/equities', format='t')

In [7]:
dates = prices.index.unique('date')
start_date = dates.min()
end_date = dates.max()


In [8]:
for sid, symbol in symbols.set_index('sid').ticker.items():
        p = prices.loc[symbol]
        p.to_hdf('finance_datareader.h5', 'kr/{}'.format(sid), format='t')

with pd.HDFStore('finance_datareader.h5') as store:
        print(store.info())

create_split_table()

<class 'pandas.io.pytables.HDFStore'>
File path: finance_datareader.h5
/kr/0                   frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/1                   frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/10                  frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/100                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/101                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/102                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/103                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/104                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/105                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/106             

In [9]:
prices

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000020,2010-01-04 00:00:00+00:00,7540.000000,7820.000,7480.000000,7520.0,1.771970e+05
000100,2010-01-04 00:00:00+00:00,27279.531250,27356.375,26895.312500,27049.0,1.475136e+05
000120,2010-01-04 00:00:00+00:00,54900.000000,55300.000,53000.000000,54500.0,9.427300e+04
000140,2010-01-04 00:00:00+00:00,28450.000000,29400.000,28450.000000,28850.0,1.942600e+04
000210,2010-01-04 00:00:00+00:00,74999.695193,77078.000,74999.695193,77078.0,1.692598e+05
...,...,...,...,...,...,...
105560,2020-12-30 00:00:00+00:00,43550.000000,43600.000,42750.000000,43400.0,2.027550e+06
105630,2020-12-30 00:00:00+00:00,17800.000000,17950.000,17250.000000,17500.0,2.846580e+05
108670,2020-12-30 00:00:00+00:00,76800.000000,78700.000,75800.000000,78000.0,9.450300e+04
111770,2020-12-30 00:00:00+00:00,31450.000000,31900.000,31250.000000,31700.0,1.333400e+05


## 설정법
1. finance_datareader.h5가 잘 설정되었는지 확인한다.

## Zipline Root 폴더 찾기
- 컴퓨터마다 다르지만 제 컴퓨터에서는 C:\Users\PC\.zipline 이었다.

파일 이동
1. ./zipline에 있는 데이터 들을 복사해서 백업해둔다.
2. 여기 폴더에 있는 finance_datareader_kr_stocks.py와 extension.py을 이동한다.
custom_data라는 폴더를 만든다.
3. finance_datareader.h5 데이터를 custom_data 폴더 안으로 넣는다.

## 최종 구조 
.zipline 폴더 위치


    |-extension.py
    |-finance_datareader_kr_stocks.py
    |-custom_data
        |-finance_datareader.h5