# Zipline Custome Bundle 생성
기존에 Zipline Bundle은 미국주식으로만 되어 있었다. 국내주식을 이용하시 위해서 한국주식으로 되어 있는 custom bundle을 생성해서 사용해보도록 하자

## Imports & Settings

In [2]:
from pathlib import Path 
import warnings
import pandas as pd 

warnings.filterwarnings('ignore')

DATA_DIR = Path('..', '..', 'data')
idx = pd.IndexSlice

In [32]:
def create_split_table():
    with pd.HDFStore('finance_datareader.h5') as store:
        store.put('kr/splits', pd.DataFrame(columns=['sid', 'effective_date', 'ratio'],
                                            data=[[1, pd.to_datetime('2000-01-01'), 1.0]]), format='t')

def load_prices():
    df = pd.read_hdf(DATA_DIR / 'assets.h5', 'finance_datareader/prices')

    return (df.loc[idx['2010': '2020', :], :]
            .reindex(columns=['adj_open', 'adj_high', 'adj_low', 'adj_close', 'adj_volume'])
            .rename(columns=lambda x: x.replace('adj_', ''))
            .unstack('ticker')
            .sort_index()
            .tz_localize('UTC')
#             .ffill(limit=5)
            .fillna(method='bfill')
            .fillna(0.01)
            .dropna(axis=1)
            .stack('ticker')
            .swaplevel()
           )

def load_symbols(tickers):
    df = pd.read_hdf(DATA_DIR / 'assets.h5', 'finance_datareader/stocks')
    df.index.names = ['ticker']
    df.reset_index(inplace=True)
    return (df[df.ticker.isin(tickers)]
            .reset_index(drop=True)
            .reset_index()
            .rename(columns={'index': 'sid'}))



In [33]:
prices = load_prices()

In [34]:
prices

Unnamed: 0_level_0,Unnamed: 1_level_0,close,high,low,open,volume
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000020,2010-01-04 00:00:00+00:00,7520.0,7820.000000,7480.000000,7540.000000,1.771970e+05
000040,2010-01-04 00:00:00+00:00,3635.0,3726.638655,3421.176471,3421.176471,1.839257e+05
000050,2010-01-04 00:00:00+00:00,9921.0,10187.080460,9768.954023,9882.988506,9.483973e+03
000060,2010-01-04 00:00:00+00:00,8696.0,8696.000000,8297.433333,8333.666667,1.110800e+06
000070,2010-01-04 00:00:00+00:00,46517.0,46517.000000,45790.171875,46309.334821,1.289576e+04
...,...,...,...,...,...,...
357120,2020-12-30 00:00:00+00:00,4805.0,4810.000000,4780.000000,4810.000000,1.236580e+05
357250,2020-12-30 00:00:00+00:00,4755.0,4765.000000,4720.000000,4765.000000,5.417800e+04
363280,2020-12-30 00:00:00+00:00,27700.0,28250.000000,25950.000000,26300.000000,8.664610e+05
365550,2020-12-30 00:00:00+00:00,5070.0,5090.000000,5040.000000,5080.000000,4.104520e+05


In [35]:
print(prices.info(null_counts=True))
tickers = prices.index.unique('ticker')

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 2154916 entries, ('000020', Timestamp('2010-01-04 00:00:00+0000', tz='UTC')) to ('900140', Timestamp('2020-12-30 00:00:00+0000', tz='UTC'))
Data columns (total 5 columns):
 #   Column  Non-Null Count    Dtype  
---  ------  --------------    -----  
 0   close   2154916 non-null  float64
 1   high    2154916 non-null  float64
 2   low     2154916 non-null  float64
 3   open    2154916 non-null  float64
 4   volume  2154916 non-null  float64
dtypes: float64(5)
memory usage: 90.5+ MB
None


In [36]:
symbols = load_symbols(tickers)
print(symbols.info(null_counts=True))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 783 entries, 0 to 782
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   sid     783 non-null    int64 
 1   ticker  783 non-null    object
 2   name    783 non-null    object
dtypes: int64(1), object(2)
memory usage: 18.5+ KB
None


In [37]:
# ticker 저장
symbols.to_hdf('finance_datareader.h5', 'kr/equities', format='t')

In [38]:
dates = prices.index.unique('date')
start_date = dates.min()
end_date = dates.max()


In [39]:
for sid, symbol in symbols.set_index('sid').ticker.items():
        p = prices.loc[symbol]
        p.to_hdf('finance_datareader.h5', 'kr/{}'.format(sid), format='t')

with pd.HDFStore('finance_datareader.h5') as store:
        print(store.info())

create_split_table()

<class 'pandas.io.pytables.HDFStore'>
File path: finance_datareader.h5
/kr/0                   frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/1                   frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/10                  frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/100                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/101                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/102                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/103                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/104                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/105                 frame_table  (typ->appendable,nrows->2714,ncols->5,indexers->[index],dc->[])
/kr/106             

In [40]:
prices.index.get_level_values('ticker').unique()

Index(['000020', '000040', '000050', '000060', '000070', '000100', '000120',
       '000140', '000150', '000180',
       ...
       '344820', '348950', '350520', '352820', '353200', '357120', '357250',
       '363280', '365550', '900140'],
      dtype='object', name='ticker', length=794)

## 설정법
1. finance_datareader.h5가 잘 설정되었는지 확인한다.

## Zipline Root 폴더 찾기
- 컴퓨터마다 다르지만 제 컴퓨터에서는 C:\Users\PC\.zipline 이었다.

파일 이동
1. ./zipline에 있는 데이터 들을 복사해서 백업해둔다.
2. 여기 폴더에 있는 finance_datareader_kr_stocks.py와 extension.py을 이동한다.
custom_data라는 폴더를 만든다.
3. finance_datareader.h5 데이터를 custom_data 폴더 안으로 넣는다.

## 최종 구조 
.zipline 폴더 위치


    |-extension.py
    |-finance_datareader_kr_stocks.py
    |-custom_data
        |-finance_datareader.h5