In [1]:
from main_strategy import *
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn')

## Data Preprocess

In [2]:
%%time
#load data
data = pd.read_csv("data.csv", low_memory=False)
data.columns = ["Code", "Company", "Industry", "Date", "Open", "High", "Low", "Close", "Volume", "MV", "PB"]
data.Date = data.Date.apply(lambda x : datetime.strptime(x, "%Y/%m/%d"))

#find which years included in data
data_year = data.Date.apply(lambda x : x.year).unique()

#find unique trading dates in data
data_date = pd.DataFrame(data.Date.unique(), columns = ["Date"])

#find first trading date for each year, these dates will be used as entry points/exit points during backtesting
first_date_each_year = []
for year in data_year:
    temp = data_date[data_date.Date.apply(lambda x : x.year == year)].min()["Date"]
    first_date_each_year.append(temp)

Wall time: 1min 4s


## Backtest

In [3]:
#instantiate strategy object
main = strategy(data, data_year, data_date, first_date_each_year)

In [4]:
%%time
#given least volume = 10, initial_capital = 100, transaction_cost = 0.00585
#there are five parameters "num_selected", "by", "ascending", "trade_mode", "weight_mode",
#these parameters have 20, 2, 2, 3, 2 choices sequentially, 
#as a result, there are 20*2*2*3*2 or 480 results during backtesting, 
#each result contain [remain capital, return] according to differnt parameters

num_selected = np.arange(5, 105, 5)
by = ["MV", "PB"]
ascending = [True, False]
trade_mode = ["A", "B", "C"]
weight_mode = ["equal", "MVbased"]

result= {}
result_temp = []
for by_i in by:
    for ascending_i in ascending:
        for trade_mode_i in trade_mode:
            for weight_mode_i in weight_mode:
                for num_selected_i in num_selected:
                    temp = main.calculate_return(by = by_i, ascending = ascending_i, 
                                                 trade_mode = trade_mode_i, weight_mode = weight_mode_i, 
                                                 num_selected = num_selected_i)
                    result_temp.append(temp)

                result["{}_{}_{}_{}".format(by_i, ascending_i, trade_mode_i, weight_mode_i)] = result_temp
                result_temp = []
                print("{}_{}_{}_{}".format(by_i, ascending_i, trade_mode_i, weight_mode_i)+"...finished")

MV_True_A_equal...finished
MV_True_A_MVbased...finished
MV_True_B_equal...finished
MV_True_B_MVbased...finished
MV_True_C_equal...finished
MV_True_C_MVbased...finished
MV_False_A_equal...finished
MV_False_A_MVbased...finished
MV_False_B_equal...finished
MV_False_B_MVbased...finished
MV_False_C_equal...finished
MV_False_C_MVbased...finished
PB_True_A_equal...finished
PB_True_A_MVbased...finished
PB_True_B_equal...finished
PB_True_B_MVbased...finished
PB_True_C_equal...finished
PB_True_C_MVbased...finished
PB_False_A_equal...finished
PB_False_A_MVbased...finished
PB_False_B_equal...finished
PB_False_B_MVbased...finished
PB_False_C_equal...finished
PB_False_C_MVbased...finished
Wall time: 3h 35min 15s


In [5]:
np.save('result.npy', result) 
#result_test = np.load('result.npy').item()

In [None]:
#data preprocess for ETF(0050, 0051), represent market performance
ETF = pd.read_csv("ETFs.csv")
ETF.columns = ["Company", "Date", "Code", "Open", "High", "Low", "Close", "Volume"]
ETF.Code = ETF.Company.apply(lambda x : x.split(" ")[0])
ETF.Company = ETF.Company.apply(lambda x : x.split(" ")[1])
ETF.Date = ETF.Date.apply(lambda x : datetime.strptime(x, "%m/%d/%Y"))
ETF = ETF[ETF.Date.apply(lambda x : x in first_date_each_year)]

#calculate return
final_return = []
for code in ETF.Code.unique():
    temp = ETF[ETF.Code == code].sort_values("Date", ascending = True)
    temp_return = (temp.Close - temp.Close.shift()) / temp.Close.shift()
    temp_return = (np.cumprod(temp_return.values[1:] + 1 - 0.00585)[-1]) * 100
    temp_return = [temp_return, temp_return/100 -1]
    final_return.append(temp_return)
    
result["0050"] = final_return[0]
result["0051"] = final_return[1]

np.save("result.npy", result)