In [1]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from ta import add_all_ta_features
from ta.trend import ema_indicator
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
import datetime

from strategy import *

In [2]:
# Grid search
xg_grid = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'max_depth': [3, 4, 5]
        }

In [3]:
def run_cerebro(dataname,train_start,random_grid,strategy):
    df = pd.read_csv(dataname+'.csv',index_col=0)
    
    df.index = pd.to_datetime(df.index)
    X_train, y_train = df.loc[train_start:'2018-01-01',:].iloc[:,:-2], df.loc[train_start:'2018-01-01']['state']
    X_test, y_test = df.loc['2018-01-01':'2022-01-01',:].iloc[:,:-2], df.loc['2018-01-01':'2022-01-01']['state']

    xg = XGBClassifier()
    xg_random = RandomizedSearchCV(estimator = xg, param_distributions = random_grid, n_iter = 100, verbose=2, random_state=42, n_jobs = -1)# Fit the random search model
    xg_random.fit(X_train, y_train)
    params = xg_random.best_params_

    model = XGBClassifier(**params).fit(X_train, y_train)

    # model = XGBClassifier().fit(X_train, y_train)
    y_pred = model.predict(X_test)

    score = model.score(X_test,y_test)
    f1 = f1_score(y_pred,y_test,average='weighted')

    df.loc['2018-01-01':'2022-01-01',['state']] = y_pred.reshape(-1,1)


    # Backtesting
    # df['Date']=pd.to_datetime(df.index)
    df = df.reset_index()
    # state_column = df.shape[1]-1
    #     # Instantiate Cerebro engine
    # cerebro = bt.Cerebro()

    # data = PandasData(
    #     dataname = df,
    #     fromdate=datetime.datetime(2018, 1, 2),
    #     todate=datetime.datetime(2021, 12, 30),
    #     datetime = 0,
    #     open = 1,
    #     high = 2,
    #     low = 3,
    #     close = 4,
    #     state = state_column,
    # )

    # # Add data to Cerebro
    # cerebro.adddata(data)

    # # Add strategy to Cerebro
    # cerebro.addstrategy(strategy)

    # # Default position size
    # cerebro.addsizer(bt.sizers.SizerFix, stake=1)

    # # Add analytics to Cerebro
    # cerebro.addanalyzer(btanalyzers.SharpeRatio, _name='SharpeRatio')
    # cerebro.addanalyzer(btanalyzers.AnnualReturn, _name='AnnualReturn')
    # cerebro.addanalyzer(btanalyzers.DrawDown, _name='DrawDown')
    # cerebro.addanalyzer(btanalyzers.TimeDrawDown, _name='TimeDrawDown')
    # cerebro.addanalyzer(btanalyzers.PositionsValue, _name='PositionsValue')
    # cerebro.addanalyzer(btanalyzers.LogReturnsRolling, _name='LogReturnsRolling')
    # cerebro.addanalyzer(btanalyzers.PeriodStats, _name='PeriodStats')
    # cerebro.addanalyzer(btanalyzers.Returns, _name='Returns')
    # cerebro.addanalyzer(btanalyzers.TradeAnalyzer, _name='TradeAnalyzer')
    # cerebro.addanalyzer(btanalyzers.Transactions, _name='Transactions')

    # # Run Cerebro Engine
    # start_portfolio_value = cerebro.broker.getvalue()

    # # cerebro.run()
    # thestrats = cerebro.run()
    # thestrat = thestrats[0]

    # end_portfolio_value = cerebro.broker.getvalue()
    # pnl = end_portfolio_value - start_portfolio_value

    # # print(f'Starting Portfolio Value: {start_portfolio_value:2f}')
    # # print(f'Final Portfolio Value: {end_portfolio_value:2f}')
    # # print(f'PnL: {pnl:.2f}\n')


    # sharpe = thestrat.analyzers.SharpeRatio.get_analysis()['sharperatio']
    # # print(thestrat.analyzers.AnnualReturn.get_analysis())
    # # print(thestrat.analyzers.DrawDown.get_analysis())
    # # # print(thestrat.analyzers.TimeDrawDown.get_analysis())
    # # print(thestrat.analyzers.PositionsValue.get_analysis())
    # # print(thestrat.analyzers.LogReturnsRolling.get_analysis())
    # # print(thestrat.analyzers.PeriodStats.get_analysis())
    # # print(thestrat.analyzers.Returns.get_analysis())
    # # print(thestrat.analyzers.TradeAnalyzer.get_analysis())
    # # print(thestrat.analyzers.Transactions.get_analysis())
    return df,score,f1,params

In [5]:
# datanames = ['y1_2003','y2_2003','y3_2003','y4_2003','y5_2003','gmm_1_labeled_2003','hmm_1_labeled_2003']
datanames = ['y1_2003','y2_2003','y5_2003','gmm_1_labeled_2003','hmm_1_labeled_2003']
train_start_list = ['2003-01-01','2010-01-04','2015-01-02']

pnl_result = []
pnl = 0
df_result = []
for dataname in datanames:
    for train_start in train_start_list:
        df_final,score,f1,params = run_cerebro(dataname,train_start,xg_grid,MarketStatus)
        # pnl_dic = {'dataname':dataname,
        #             'train_start':train_start,
        #             'pnl':pnl,
        #             'sharpe':sharpe,
        #             'score':score,
        #             'f1':f1,
        #             'params':params
        #             }
        df_dic = {'dataname':dataname,
                    'train_start':train_start,
                    'df':df_final,
                    'score':score,
                    'f1':f1,
                    'params':params
        }
        df_result.append(df_dic)
df_result



Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits








Fitting 5 folds for each of 45 candidates, totalling 225 fits






[{'dataname': 'y1_2003',
  'train_start': '2003-01-01',
  'df':            Date         Open         High          Low        Close  \
  0    2003-01-02   489.489990   505.179993   489.489990   505.179993   
  1    2003-01-03   505.179993   506.230011   502.100006   504.769989   
  2    2003-01-06   504.769989   517.530029   504.769989   516.039978   
  3    2003-01-07   516.039978   516.650024   510.769989   512.450012   
  4    2003-01-08   512.450012   512.450012   504.529999   505.309998   
  ...         ...          ...          ...          ...          ...   
  4749 2021-12-23  2750.840088  2776.620117  2750.840088  2768.360107   
  4750 2021-12-27  2769.340088  2803.879883  2769.340088  2803.739990   
  4751 2021-12-28  2803.899902  2813.590088  2795.709961  2798.350098   
  4752 2021-12-29  2798.379883  2807.370117  2791.860107  2801.870117   
  4753 2021-12-30  2802.909912  2813.250000  2794.189941  2795.909912   
  
          Adj Close  volatility_bbm  volatility_bbh  volati

In [8]:
df_result_list = df_result
for result in df_result_list:
    name = 'xg_'+result['dataname'][0:2]+'_'+result['train_start'][0:4]+'.csv'
    # print("'"+name+"',")
    result['df'].to_csv(name)

'xg_y1_2003',
'xg_y1_2010',
'xg_y1_2015',
'xg_y2_2003',
'xg_y2_2010',
'xg_y2_2015',
'xg_y5_2003',
'xg_y5_2010',
'xg_y5_2015',
'xg_gm_2003',
'xg_gm_2010',
'xg_gm_2015',
'xg_hm_2003',
'xg_hm_2010',
'xg_hm_2015',


In [None]:
pnl_result_df = pd.DataFrame(pnl_result)
pnl_result_df.sort_values(by='pnl',ascending=False)
pnl_result_df.to_excel('0 xg_result.xlsx')