In [1]:
import pandas as pd
# import modin.pandas as mpd
import numpy as np
import datetime
from scipy.stats import norm
from tqdm import tqdm
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import matplotlib.ticker as ticker
import mplfinance as mpf
from multiprocessing import Pool
from contextlib import contextmanager
from joblib import Parallel, delayed
from IPython.display import clear_output
# import dill as pickle
import pickle
import time

In [2]:
# causis api
from causis_api.const import get_version
from causis_api.const import login
login.username = 'shuai.song'
login.password = 'Tsinghua2022'
login.version = get_version()
from causis_api.data import *
from causis_api.tool import *

In [3]:
commodities = {
            'rb':{'multiplier':10,'mintick':1,'exchange':'SHF'},
            'hc':{'multiplier':10,'mintick':1,'exchange':'SHF'},
            'i':{'multiplier':100,'mintick':0.5,'exchange':'DCE'},
            'm':{'multiplier':10,'mintick':1,'exchange':'DCE'},
            'pp':{'multiplier':5,'mintick':1,'exchange':'DCE'},
            'MA':{'multiplier':10,'mintick':1,'exchange':'CZC'},
            'bu':{'multiplier':10,'mintick':2,'exchange':'SHF'},
            'l':{'multiplier':5,'mintick':5,'exchange':'DCE'},
            'p':{'multiplier':10,'mintick':2,'exchange':'DCE'},
            'v':{'multiplier':5,'mintick':5,'exchange':'DCE'},
            'CF':{'multiplier':5,'mintick':5,'exchange':'CZC'},
            'OI':{'multiplier':10,'mintick':1,'exchange':'CZC'},
            'SR':{'multiplier':10,'mintick':1,'exchange':'CZC'},
            'TA':{'multiplier':5,'mintick':2,'exchange':'CZC'},
            'SA':{'multiplier':20,'mintick':1,'exchange':'CZC'},
} 

In [4]:
F_data_5min = pd.read_pickle('./dataset/F_data_5min.pkl')
F_data_D = pd.read_pickle('./dataset/F_data_D.pkl')

In [14]:
## Macro Trend Indicator
DAY_EMA_S = 60
DAY_EMA_L = 120

## Super Trend Indicator
ATR_WIN_S = 5
STD_MULTIPLIER_S = 2

ATR_WIN_L = 10
STD_MULTIPLIER_L = 3

## BackTest & Simulation Parameters
ACCOUNT = 1e6
REWARD_RATIO = 0.15
INIT_CAP = 1e8

## Plot Settings
COLORS = ['darkorange', 'cyan', 'royalblue', 'deeppink', 'indianred', 'limegreen']

In [9]:
@contextmanager 
def timer(name: str, _align, _log): # ⏱
    s = time.time()
    yield
    elapsed = time.time() - s
    if _log:
        print(f"{ '[' + name + ']' :{_align}} | {time.strftime('%Y-%m-%d %H:%M:%S')} Done | Using {elapsed: .3f} seconds")

In [10]:
def price_adj(stmp, PADJ):
    if PADJ:
        stmp['OPEN_adj'] = round(stmp['OPEN'] * stmp['ADJ'], 2)
        stmp['CLOSE_adj'] = round(stmp['CLOSE'] * stmp['ADJ'], 2)
        stmp['HIGH_adj'] = round(stmp['HIGH'] * stmp['ADJ'], 2)
        stmp['LOW_adj'] = round(stmp['LOW'] * stmp['ADJ'], 2)
    else:
        stmp['OPEN_adj'] = stmp['OPEN'] 
        stmp['CLOSE_adj'] = stmp['CLOSE'] 
        stmp['HIGH_adj'] = stmp['HIGH'] 
        stmp['LOW_adj'] = stmp['LOW'] 

    return stmp

In [11]:
def technical_analysis(stmp, log = True, PADJ=True):
    stmp = stmp.loc[~stmp['CLOCK'].duplicated()]
    stmp = price_adj(stmp.copy(deep=True), PADJ)

    with timer('TR', 20, True):
        ## TR
        TR = []
        TR.append(stmp['HIGH_adj'][0] - stmp['LOW_adj'][0])
        for i in range(1, stmp.shape[0]): 
            TR.append( max( stmp['HIGH_adj'][i]-stmp['LOW_adj'][i], np.abs(stmp['HIGH_adj'][i]-stmp['CLOSE_adj'][i-1]), np.abs(stmp['LOW_adj'][i]-stmp['CLOSE_adj'][i-1])) )

        stmp['TR'] = TR

    with timer('Super Trend', 20, False):
        ## ATR
        def STD_CAL(atr_win, std_mul, _stmp):
            _stmp['ATR'] = _stmp['TR'].rolling(atr_win, min_periods=1).mean() 

            ## Super Trend
            _stmp['h12'] = (_stmp['HIGH_adj'] + _stmp['LOW_adj'])/2
            _stmp['bub'] = _stmp['h12'] + std_mul*_stmp['ATR']
            _stmp['blb'] = _stmp['h12'] - std_mul*_stmp['ATR']

            UB = []
            LB = []
            UB.append(_stmp['bub'][0])
            LB.append(_stmp['blb'][0])

            for i in range(1, _stmp.shape[0]):
                if _stmp['CLOSE_adj'][i-1] > LB[i-1]:
                    LB.append(max(_stmp['blb'][i], LB[i-1]))
                else:
                    LB.append(_stmp['blb'][i])
                
                if _stmp['CLOSE'][i-1] < UB[i-1]:
                    UB.append(min(_stmp['bub'][i], UB[i-1]))
                else:
                    UB.append(_stmp['bub'][i])

            STD = []
            STD.append(np.sign(_stmp['CLOSE_adj'][0] - _stmp['OPEN_adj'][0]))

            for i in range(1, _stmp.shape[0]):
                if _stmp['CLOSE_adj'][i] > UB[i]:
                    STD.append(1.0)
                elif _stmp['CLOSE_adj'][i] < LB[i]:
                    STD.append(-1.0)
                else:
                    STD.append(STD[i-1])

            _stmp['UB'] = UB
            _stmp['LB'] = LB
            _stmp['STD'] = STD
            
            return _stmp['UB'], _stmp['LB'], _stmp['STD']
        
        with timer('Short Super Trend', 20, True):
            stmp['UB_S'], stmp['LB_S'], stmp['STD_S'] = STD_CAL(ATR_WIN_S, STD_MULTIPLIER_S, stmp.copy(deep=True))

        with timer('Long Super Trend', 20, True):
            stmp['UB_L'], stmp['LB_L'], stmp['STD_L'] = STD_CAL(ATR_WIN_L, STD_MULTIPLIER_L, stmp.copy(deep=True))

        
    return stmp

In [13]:
def Double_STD_trigger(stmp):
    tmp = 0.5*(2-np.abs(stmp['STD_S']+stmp['STD_S'].shift(1)))/stmp['STD_S']
    stmp['STD_S_Fork'] = tmp
    stmp['trigger'] = np.zeros(stmp.shape[0])
    
    buy_trigger_condition = (stmp['STD_S_Fork']== 1) & (stmp['STD_L']== 1)
    stmp['trigger'].loc[buy_trigger_condition] = 1

    sell_trigger_condition = (stmp['STD_S_Fork']==-1) & (stmp['STD_L']==-1)
    stmp['trigger'].loc[sell_trigger_condition] = -1
   
    return stmp

In [16]:
def BackTest(scom, filter):
    # 5 MINs
    COM_5 = F_data_5min[scom]
    COM_D = F_data_D[scom]

    COM_5 = COM_5.set_index('CLOCK', drop=False)
    COM_5 = COM_5.ffill()

    COM_D = COM_D.set_index('CLOCK', drop=False)

    COM_5['DATE'] = COM_5['CLOCK'].apply(lambda x: x[:10])

    COM_5_VTD = []

    for name, g in COM_5.groupby('DATE'):
        COM_5_VTD.append(name)

    ## EMA Filter
    def EMA_filter():
        COM_D['EMA_S'] = COM_D['CLOSE'].ewm(span=DAY_EMA_S, adjust=False).mean()
        COM_D['EMA_L'] = COM_D['CLOSE'].ewm(span=DAY_EMA_L, adjust=False).mean()
        Buy_signal_mask = COM_D.loc[COM_D['EMA_S']<COM_D['EMA_L']]['CLOCK'].to_list()
        Sell_signal_mask = COM_D.loc[COM_D['EMA_S']>COM_D['EMA_L']]['CLOCK'].to_list()

        return Buy_signal_mask, Sell_signal_mask

    COM_5 = technical_analysis(COM_5, PADJ=False)
    
    COM_5 =  Double_STD_trigger(COM_5)

    if filter:
        BSM_EMA, SSM_EMA = EMA_filter()
        COM_5['trigger'].loc[(COM_5['DATE'].apply(lambda x: x in BSM_EMA)) & (COM_5['trigger']==1)] = 0.0
        COM_5['trigger'].loc[(COM_5['DATE'].apply(lambda x: x in SSM_EMA)) & (COM_5['trigger']==-1)] = 0.0


    TICK = commodities[scom]['mintick'] * 1.5

    BUY_trigger_list = list(COM_5['trigger'].loc[COM_5['trigger']== 1].index)
    SELL_trigger_list = list(COM_5['trigger'].loc[COM_5['trigger']==-1].index)

    BUY_log = pd.DataFrame(columns=['open_bar', 'close_bar', 'open_price', 'close_price', 'close_reason'])
    clock_rank = COM_5['CLOCK'].to_list()
    cal_count = 0

    # for t in tqdm(BUY_trigger_list[:], desc='BUY Simulation'):
    for i in range(len(BUY_trigger_list)):
        if i >= len(BUY_trigger_list):
            break
        t = BUY_trigger_list[i]
        cal_count += 1
        if cal_count % 10 == 1:
            process = f'{cal_count}/{len(BUY_trigger_list)}'
            print(f"{'[' + process + ']' :12} | {time.strftime('%Y-%m-%d %H:%M:%S')} Done ")
        if cal_count % 100 == 0:
            clear_output()

        open_bar = clock_rank[clock_rank.index(t)+1]
        open_price = COM_5['OPEN'].loc[open_bar] + TICK

        loss_cut = COM_5.loc[open_bar]['LB_S']
        profit_cut = COM_5.loc[open_bar]['OPEN'] * (1 + REWARD_RATIO) 

        if COM_5['CLOCK'].loc[t:].loc[COM_5['trigger']==-1].shape[0]: # fork change coming, interrupt the simulation
            close_bar =  clock_rank[min(clock_rank.index(COM_5['CLOCK'].loc[t:].loc[COM_5['trigger']== -1][0])+1, len(clock_rank)-1)]
        else:
            close_bar = clock_rank[-1]

        for i in range(1, int((pd.to_datetime(close_bar) - pd.to_datetime(t)).total_seconds()/60)):
            observation_bar = clock_rank[min(clock_rank.index(open_bar)+i, len(clock_rank)-1)]

            if observation_bar in BUY_trigger_list:
                loss_cut = COM_5.loc[observation_bar]['LB_S']
                del BUY_trigger_list[BUY_trigger_list.index(observation_bar)]

            if observation_bar in SELL_trigger_list:
                loss_cut = COM_5.loc[observation_bar]['UB_S']            

            if (COM_5['LOW'].loc[observation_bar] <= loss_cut):
                close_bar = clock_rank[clock_rank.index(observation_bar)+1]
                close_reason = 'Loss_Cut'
                break

            if (COM_5['HIGH'].loc[observation_bar] >= profit_cut):
                close_bar = clock_rank[clock_rank.index(observation_bar)+1]
                close_reason = 'Profit_Cut'
                break

        close_price = COM_5['OPEN'].loc[close_bar] - TICK

        BUY_log.loc[BUY_log.shape[0]] = [open_bar, close_bar, open_price, close_price, close_reason]

    BUY_log['gain'] = BUY_log['close_price'] - BUY_log['open_price']

    SELL_log = pd.DataFrame(columns=['open_bar', 'close_bar', 'open_price', 'close_price'])
    clock_rank = COM_5['CLOCK'].to_list()
    cal_count = 0

    # for t in tqdm(BUY_trigger_list[:], desc='BUY Simulation'):
    for i in range(len(SELL_trigger_list)):
        if i >= len(SELL_trigger_list):
            break
        t = SELL_trigger_list[i]
        cal_count += 1
        if cal_count % 10 == 1:
            process = f'{cal_count}/{len(SELL_trigger_list)}'
            print(f"{'[' + process + ']' :12} | {time.strftime('%Y-%m-%d %H:%M:%S')} Done ")
        if cal_count % 100 == 0:
            clear_output()

        open_bar = clock_rank[clock_rank.index(t)+1]
        open_price = COM_5['OPEN'].loc[open_bar] - TICK

        loss_cut = COM_5.loc[open_bar]['UB_S']
        profit_cut = COM_5.loc[open_bar]['OPEN'] * (1 - REWARD_RATIO) 

        if COM_5['CLOCK'].loc[t:].loc[COM_5['trigger']== 1].shape[0]: # fork change coming, interrupt the simulation
            close_bar =  clock_rank[min(clock_rank.index(COM_5['CLOCK'].loc[t:].loc[COM_5['trigger']== 1][0])+1, len(clock_rank)-1)]
        else:
            close_bar = clock_rank[-1]

        for i in range(1, int((pd.to_datetime(close_bar) - pd.to_datetime(t)).total_seconds()/60)):
            observation_bar = clock_rank[min(clock_rank.index(open_bar)+i, len(clock_rank)-1)]

            if observation_bar in SELL_trigger_list:
                loss_cut = COM_5.loc[observation_bar]['UB_S']
                del SELL_trigger_list[SELL_trigger_list.index(observation_bar)]

            if observation_bar in BUY_trigger_list:
                loss_cut = COM_5.loc[observation_bar]['LB_S']            

            if (COM_5['HIGH'].loc[observation_bar] >= loss_cut) or (COM_5['LOW'].loc[observation_bar] <= profit_cut):
                close_bar = clock_rank[clock_rank.index(observation_bar)+1]
                break

        close_price = COM_5['OPEN'].loc[close_bar] + TICK

        SELL_log.loc[SELL_log.shape[0]] = [open_bar, close_bar, open_price, close_price]


    SELL_log['gain'] = SELL_log['open_price'] - SELL_log['close_price']

    SELL_log['signal']= 'SELL'
    BUY_log['signal'] = 'BUY'

    buy = BUY_log.set_index('open_bar', drop=False)
    sell = SELL_log.set_index('open_bar', drop=False)

    trading_log_buy = buy.sort_index()
    trading_log_sell = sell.sort_index()

    trading_log_all = buy.append(sell)
    trading_log_all = trading_log_all.sort_index()
    
    Dominant = get_dominant_contracts(f"R.CN.{commodities[scom]['exchange']}.{scom}.0004", COM_5_VTD[0], COM_5_VTD[-1])

    Dominant['last_dc'] = Dominant['ProductCode'].shift(1)
    Dominant['Date'] = Dominant['Date'].apply(lambda x: x[:4]+'-'+x[4:6]+'-'+x[6:8])

    dc_change = Dominant.loc[(Dominant['ProductCode']!=Dominant['last_dc'])][1:]

    def Date_Belong(clock):
        if clock[11:] < '21:00:00':
            return clock[:10]
        else:
            return COM_5_VTD[min(COM_5_VTD.index(clock[:10])+1, len(COM_5_VTD)-1)]

    trading_log_buy['open_date'] = trading_log_buy['open_bar'].apply(lambda x: Date_Belong(x))
    trading_log_buy['close_date'] = trading_log_buy['close_bar'].apply(lambda x: Date_Belong(x))

    trading_log_sell['open_date'] = trading_log_sell['open_bar'].apply(lambda x: Date_Belong(x))
    trading_log_sell['close_date'] = trading_log_sell['close_bar'].apply(lambda x: Date_Belong(x))

    trading_log_all['open_date'] = trading_log_all['open_bar'].apply(lambda x: Date_Belong(x))
    trading_log_all['close_date'] = trading_log_all['close_bar'].apply(lambda x: Date_Belong(x))

    def Simulation(trading_log):
        Balance = pd.DataFrame()
        import numpy as np
        Balance = Balance.reindex(COM_5_VTD)
        Balance['Account']  = np.zeros(Balance.shape[0])
        Balance['interday_profit'] = np.zeros(Balance.shape[0])
        Balance['holding_profit'] = np.zeros(Balance.shape[0])
        Balance['holding_pos'] = np.zeros(Balance.shape[0])
        Balance['dc_change_gap'] = np.zeros(Balance.shape[0])

        for i in tqdm(range(trading_log.shape[0]), desc='Simulation...'):
            entry = trading_log.iloc[i]

            if entry['open_date'] == entry['close_date']: # open and close position in the same day
                OPEN_POS_VOL = int(INIT_CAP/(entry['open_price']*commodities[scom]['multiplier'])) # setting the allocatable money
                Balance['interday_profit'].loc[entry['open_date']] += entry['gain'] * OPEN_POS_VOL * commodities[scom]['multiplier']

            else:# holding position for a period
                pos_dir = 1 if entry['signal'] == 'BUY' else -1
                OPEN_POS_VOL = int(INIT_CAP/(entry['open_price']*commodities[scom]['multiplier'])) 

                period_start = entry['open_date']
                period_end   = COM_5_VTD[COM_5_VTD.index(entry['close_date'])-1]

                # gain or loss caused by position open
                delta_price = COM_5['CLOSE'].loc[period_start+' 15:00:00'] - entry['open_price']
                Balance['holding_pos'].loc[entry['open_date']] = pos_dir * OPEN_POS_VOL
                Balance['holding_profit'].loc[entry['open_date']] += pos_dir * OPEN_POS_VOL * delta_price * commodities[scom]['multiplier']


                # holding period value fluctuation
                for i in range(COM_5_VTD.index(entry['open_date'])+1, COM_5_VTD.index(entry['close_date'])):
                    delta_price = COM_5['CLOSE'].loc[COM_5_VTD[i]+' 15:00:00'] - COM_5['CLOSE'].loc[COM_5_VTD[i-1]+' 15:00:00'] 
                    Balance['holding_pos'].loc[COM_5_VTD[i]] = pos_dir * OPEN_POS_VOL
                    Balance['holding_profit'].loc[COM_5_VTD[i]] += pos_dir * OPEN_POS_VOL * delta_price * commodities[scom]['multiplier']

                delta_price = entry['close_price'] - COM_5['CLOSE'].loc[period_end+' 15:00:00']
                Balance['holding_pos'].loc[entry['close_date']] = pos_dir * OPEN_POS_VOL
                Balance['holding_profit'].loc[entry['close_date']] += pos_dir * OPEN_POS_VOL * delta_price * commodities[scom]['multiplier']

        for i in tqdm(range(dc_change.shape[0]), desc='Dominant Contracts Change'):
            try:
                entry = dc_change.iloc[i]
                nc = get_price(entry['ProductCode'], entry['Date'], entry['Date'])
                np = nc['OPEN'] # new contract price

                oc = get_price(entry['last_dc'], entry['Date'], entry['Date'])
                op = oc['OPEN'] # old contract price

                Balance['dc_change_gap'].loc[entry['Date']] = (op - np)*Balance['holding_pos'].loc[entry['Date']]
            except:
                continue # some data is ommited which is not contained in COM_5 either, so just ignore them

        # dc change has no effect
        Balance['holding_pos'].loc[(Balance['holding_pos']!=0) & (pd.Series(Balance.index).apply(lambda x: x in dc_change['Date'].to_list()))]

        Balance['d_gain'] = Balance['interday_profit'] + Balance['holding_profit'] +  Balance['dc_change_gap']
        Balance['Pnl'] = Balance['d_gain'].cumsum()/INIT_CAP + 1

        return Balance

    Balance_ALL = Simulation(trading_log_all)
    Balance_BUY = Simulation(trading_log_buy)
    Balance_SELL = Simulation(trading_log_sell)

    fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(15, 7), dpi=150)

    COM_D['CLOSE'].plot(ax=axes[0],color='r', alpha=0.6, label='CLOSE')
    if filter:
        COM_D['EMA_S'].plot(ax=axes[0], label='EMA_S')
        COM_D['EMA_L'].plot(ax=axes[0], label='EMA_L')

    axes[0].set_title('Price')
    axes[0].legend()

    Balance_ALL['Pnl'][:].plot(ax = axes[1], label='BUY & SELL')
    Balance_BUY['Pnl'][:].plot(ax = axes[1], label='BUY')
    Balance_SELL['Pnl'][:].plot(ax = axes[1], label='SELL')
    axes[1].set_title('BUY & SELL')
    axes[1].legend()

    plt.tight_layout(True)
    code = 'f' if filter else 'o'
    plt.savefig(f'img/{scom}-{code}.png')

In [18]:
for i in commodities:
    scom = i
    print(f'====== {scom} ======')
    print('------- Filter ------')
    BackTest(scom, filter = True)
    print('------- Origin ------')
    BackTest(scom, filter = False)

[TR]                 | 2022-08-19 00:23:41 Done | Using  5.677 seconds
[Short Super Trend]  | 2022-08-19 00:23:46 Done | Using  5.191 seconds
[Long Super Trend]   | 2022-08-19 00:23:51 Done | Using  5.231 seconds


KeyboardInterrupt: 