In [1]:
import tushare
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm

In [4]:
pro = tushare.pro_api('558af0c6fe4be3945ce1b68021e04d897fa78db639a3f354f2d4e655')
ts_code_list = pro.stock_basic(exchange='', list_status='L', )['ts_code']
list_date_list = pro.stock_basic(exchange='', list_status='L', fields='list_date')['list_date']

In [102]:
# Get all the stocks trading on the Shenzhen exchange or 
# Shanghai exchange that was on the list before 2010
SZ_code_list = ts_code_list[(ts_code_list.str.startswith('00')) & (list_date_list < '20100101')]
SH_code_list = ts_code_list[(ts_code_list.str.startswith('60')) & (list_date_list < '20100101')]
total_code_list = pd.concat([SZ_code_list, SH_code_list])

In [101]:
start_date = '20100101'
end_date = '20200101'

def download_stock_data(ts_code_list):
    for ts_code in tqdm.tqdm(ts_code_list):    
        df = tushare.pro_bar(ts_code=ts_code, adj='qfq', start_date=start_date, end_date=end_date)
        df.to_csv('./data/' + ts_code, mode = 'w')
    print("done")

def download_stock_fundamental_data(ts_code_list):
    for ts_code in tqdm.tqdm(ts_code_list):    
        df = pro.daily_basic(ts_code=ts_code, start_date=start_date, end_date=end_date, 
                             fields='ts_code,trade_date,turnover_rate,volume_ratio,pe,pb,dv_ratio,total_mv')
        df.to_csv('./data/' + ts_code + '_fundamental', mode = 'w')
    print("done")
    
def combine_table(ts_code_list):
    for ts_code in tqdm.tqdm(ts_code_list):
        price_table = pd.read_csv('./data/{}'.format(ts_code))
        price_table['time'] = pd.to_datetime(price_table['trade_date'], format = '%Y%m%d')
        price_table = price_table.set_index('time')
        price_table = price_table.drop(columns = ['Unnamed: 0', 'trade_date'])
        model_state_table =  pd.read_csv('./data/{}_fundamental'.format(ts_code))
        model_state_table['time'] = pd.to_datetime(model_state_table['trade_date'], format = '%Y%m%d')
        model_state_table = model_state_table.set_index('time')
        model_state_table = model_state_table.drop(columns = ['Unnamed: 0', 'trade_date', 'ts_code'])
        joint_table = price_table.join(model_state_table, how = 'outer').iloc[::-1]
        joint_table.to_pickle('C:\TUShareData\join_table_{}.pkl'.format(ts_code))
    print('done')

def load_joint_table(ts_code):
    return pd.read_pickle('C:\TUShareData\join_table_{}.pkl'.format(ts_code))