# Toshare Coding Exercises

[Tushare API](https://tushare.pro/document/2?doc_id=27)
In this notebook you need create your own tushare account and create custom factors and filters for it. 

In [None]:
!pip install tushare -i https://pypi.tuna.tsinghua.edu.cn/simple

In [35]:
import tushare as ts
import pandas as pd
import numpy as np
import time

print(tushare.__version__)

1.2.89


## Initialized API

In [38]:
token = ''
ts.set_token(token)
pro = ts.pro_api()

## Load Data

In [93]:
# got calendar from date range
start_date = '20190101'
end_date = '20200101'
calendar = pro.trade_cal(exchange='SSE', is_open='1', 
                            start_date=start_date, 
                            end_date=end_date, 
                            fields='cal_date')
# check all stocks exist in market today
stocks = pro.query('stock_basic', exchange='', list_status='L', market = '主板') # 主板/创业板/科创板/CDR/北交所

ts_code_list = ','.join(stocks.ts_code.values)
print(calendar.shape, stocks.shape)
stocks.head()

(244, 1) (2185, 7)


Unnamed: 0,ts_code,symbol,name,area,industry,market,list_date
0,000001.SZ,1,平安银行,深圳,银行,主板,19910403
1,000002.SZ,2,万科A,深圳,全国地产,主板,19910129
2,000004.SZ,4,ST国华,深圳,软件服务,主板,19910114
3,000005.SZ,5,ST星源,深圳,环境保护,主板,19901210
4,000006.SZ,6,深振业A,深圳,区域地产,主板,19920427


Data from Tushare had some constraints about 500 times request per min and 6000 line data per time.
So we create function to iterate calendar and reqest data from API

In [87]:
def _get_daily(ts_code='000001.SZ', start_date='', end_date=''):
    for _ in range(3):
        #try:
            df = pro.daily(ts_code=ts_code, start_date=start_date, end_date=end_date, adj='qfq')
            return df
        #except:
            time.sleep(1)

def get_Daily_All(ts_code_list, calendar):
    '''
    param: DateFrame 
            column:cal_date | index 1,2...n
    :return: DateFrame 
            comlumns: ts_code, trade_date, open, high, low, close, pre_close, change, pct_chg, vol, amount
            index: stock_code(600547.SH	)
    '''
    stocks_daily = pd.DataFrame()
    for date in calendar['cal_date'].values:
        stocks_daily = stocks_daily.append(_get_daily(ts_code='', start_date=date, end_date=date))
    return stocks_daily

In [88]:
# load total stock daily date for one year
all_stocks = get_Daily_All(ts_code_list, calendar)
print(all_stocks.shape)
all_stocks

(885088, 11)


Unnamed: 0,ts_code,trade_date,open,high,low,close,pre_close,change,pct_chg,vol,amount
0,000673.SZ,20191231,5.22,5.32,5.05,5.10,5.16,-0.06,-1.1628,376809.74,194304.542
1,000685.SZ,20191231,8.25,8.29,8.21,8.24,8.28,-0.04,-0.4831,46570.29,38367.929
2,000690.SZ,20191231,5.65,5.67,5.62,5.65,5.66,-0.01,-0.1767,52430.97,29576.717
3,000711.SZ,20191231,5.45,5.49,5.37,5.44,5.45,-0.01,-0.1835,85171.58,46170.475
4,000715.SZ,20191231,6.01,6.11,5.95,6.09,6.03,0.06,0.9950,13468.84,8176.773
...,...,...,...,...,...,...,...,...,...,...,...
3549,002790.SZ,20190102,10.54,10.70,10.00,10.03,10.59,-0.56,-5.2880,26740.66,27110.812
3550,300517.SZ,20190102,13.39,13.44,13.18,13.23,13.20,0.03,0.2273,11731.00,15602.715
3551,600936.SH,20190102,3.74,3.78,3.69,3.71,3.74,-0.03,-0.8021,25222.38,9410.100
3552,601606.SH,20190102,11.90,12.10,11.72,11.94,12.06,-0.12,-0.9950,118737.08,141624.306


In [94]:
# save data
all_stocks = all_stocks.reset_index().drop_duplicates()
all_stocks.to_csv(start_date +'-'+ end_date + '.csv')

In [101]:
# load data from csv
all_stocks = pd.read_csv('20190101-20200101.csv')

## process data
1.output for stocks with a 60-day average dollar volume greater than ¥50,000

In [139]:
universe = all_stocks.iloc[:,3:]
universe = universe.sort_index(axis=0, ascending=False)
# convert date to standard string format, easy to filter
universe["date"] = pd.to_datetime(universe["trade_date"], format='%Y%m%d')
universe["date"] = universe.date.apply(lambda x: x.strftime("%Y-%m-%d"))
# drop missing data
universe = universe.dropna()
universe = universe.sort_values(by=["date", "ts_code"]).reset_index(drop=True)

In [157]:
# Create a screen for our Pipeline
#universe = AverageDollarVolume(window_length = 60)>50000
test = universe.sort_values(by=["ts_code",'date']).reset_index(drop=True)
test['average60'] = test.rolling(60).mean()['amount']
test = test.loc[test.average60 >= 50000]

In [158]:
test

Unnamed: 0,trade_date,ts_code,open,high,low,close,pre_close,change,pct_chg,vol,amount,date,average60
59,20190402,000001.SZ,13.28,13.48,13.23,13.36,13.18,0.18,1.3657,1100384.04,1466040.987,2019-04-02,1.312781e+06
60,20190403,000001.SZ,13.21,13.45,13.15,13.44,13.36,0.08,0.5988,792915.79,1056166.231,2019-04-03,1.322072e+06
61,20190404,000001.SZ,13.43,14.00,13.43,13.86,13.44,0.42,3.1250,2034365.00,2796366.353,2019-04-04,1.362271e+06
62,20190408,000001.SZ,13.90,14.43,13.72,13.96,13.86,0.10,0.7215,1743176.20,2464536.241,2019-04-08,1.379644e+06
63,20190409,000001.SZ,13.87,13.98,13.75,13.81,13.96,-0.15,-1.0745,781332.38,1080549.202,2019-04-09,1.383634e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...
885083,20191225,688399.SH,58.89,59.30,58.55,58.86,58.77,0.09,0.1531,14580.11,85929.383,2019-12-25,1.465729e+05
885084,20191226,688399.SH,59.02,60.25,58.88,59.40,58.86,0.54,0.9174,16945.64,100957.027,2019-12-26,1.427010e+05
885085,20191227,688399.SH,59.99,59.99,56.98,56.98,59.40,-2.42,-4.0741,24338.08,142290.976,2019-12-27,1.413676e+05
885086,20191230,688399.SH,56.20,56.60,55.55,56.38,56.98,-0.60,-1.0530,12249.09,68675.609,2019-12-30,1.386485e+05


In [167]:
# filte pre_close>30 and pre_close<150
test['close30'] = (test['close']>30)&(test['close']<150)
test

Unnamed: 0,trade_date,ts_code,open,high,low,close,pre_close,change,pct_chg,vol,amount,date,average60,close30
59,20190402,000001.SZ,13.28,13.48,13.23,13.36,13.18,0.18,1.3657,1100384.04,1466040.987,2019-04-02,1.312781e+06,False
60,20190403,000001.SZ,13.21,13.45,13.15,13.44,13.36,0.08,0.5988,792915.79,1056166.231,2019-04-03,1.322072e+06,False
61,20190404,000001.SZ,13.43,14.00,13.43,13.86,13.44,0.42,3.1250,2034365.00,2796366.353,2019-04-04,1.362271e+06,False
62,20190408,000001.SZ,13.90,14.43,13.72,13.96,13.86,0.10,0.7215,1743176.20,2464536.241,2019-04-08,1.379644e+06,False
63,20190409,000001.SZ,13.87,13.98,13.75,13.81,13.96,-0.15,-1.0745,781332.38,1080549.202,2019-04-09,1.383634e+06,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
885083,20191225,688399.SH,58.89,59.30,58.55,58.86,58.77,0.09,0.1531,14580.11,85929.383,2019-12-25,1.465729e+05,True
885084,20191226,688399.SH,59.02,60.25,58.88,59.40,58.86,0.54,0.9174,16945.64,100957.027,2019-12-26,1.427010e+05,True
885085,20191227,688399.SH,59.99,59.99,56.98,56.98,59.40,-2.42,-4.0741,24338.08,142290.976,2019-12-27,1.413676e+05,True
885086,20191230,688399.SH,56.20,56.60,55.55,56.38,56.98,-0.60,-1.0530,12249.09,68675.609,2019-12-30,1.386485e+05,True
