In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import talib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score,confusion_matrix,precision_score,recall_score,f1_score

In [2]:
end = pd.datetime.now().date() - pd.Timedelta(days = 1*252)
start = end - pd.Timedelta(days = ((12*252)+30))

  end = pd.datetime.now().date() - pd.Timedelta(days = 1*252)


In [3]:
class backtest_ML:
    
    def __init__(self,ticker,start,end):
          
        self.ticker = ticker
        self.start = start
        self.end = end
    
        self.fetch_data()
        self.process_data()
        self.train_test_split()
        self.model()
        self.predict()
        self.model_performance()
    
    def fetch_data(self):
        self.df = yf.download(self.ticker,self.start,self.end)
        self.df1=yf.download("^VIX",self.start,self.end)
    def process_data(self):
        
        self.df['Daily Returns'] = np.log(self.df['Adj Close']/self.df['Open'])
        self.df['Avg Price'] = talib.AVGPRICE(self.df['Open'].shift(1),self.df['High'].shift(1),self.df['Low'].shift(1),self.df['Adj Close'].shift(1))
        self.df['Gap prev close'] = np.log((self.df['Open'])/(self.df['Adj Close'].shift(1)))
        self.df['Gap prev Avg price'] = np.log((self.df['Open'])/(self.df['Avg Price'].shift(1)))
        self.df['MA_1'] = self.df['Adj Close'].shift(1).rolling(window=3,center=False).mean()
        self.df['Stdev_1']=self.df['Adj Close'].shift(1).rolling(window=3,center=False).std() 
        self.df['Prev day'] = np.log((self.df['Adj Close'].shift(1))/(self.df['Adj Close'].shift(2)))
        self.df['2 Days Low'] = np.where((self.df['Daily Returns'].shift(1) < 0) & (self.df['Daily Returns'].shift(2) < 0),1,0)
        self.df['2 Days High'] = np.where((self.df['Daily Returns'].shift(1) > 0) & (self.df['Daily Returns'].shift(2) > 0),1,0)
        self.df['VIX'] = self.df1['Open'] 
        self.df['RSI'] =talib.RSI(self.df['Adj Close'],timeperiod = 7)
        self.df['Momentum'] =talib.MOM(self.df['Adj Close'].shift(1),timeperiod=5)
        self.df['MFI'] = talib.MFI(self.df['High'].shift(1),self.df['Low'],self.df['Adj Close'],self.df['Volume'],timeperiod=5)
        self.df['OBV'] = talib.OBV(self.df['Volume'].shift(1),self.df['Adj Close'].shift(1))
        self.df['ATR'] = talib.ATR(self.df['High'].shift(1),self.df['Low'].shift(1),self.df['Adj Close'].shift(1),timeperiod=5)
        self.df['HB Signal'] = talib.HT_DCPERIOD(self.df['Adj Close'])
        self.df['Correl'] = talib.CORREL(self.df['High'].shift(1),self.df['Low'].shift(1),timeperiod=5)
        self.df['UpDown'] = np.where(np.log(self.df['Adj Close']/self.df['Open']) > 0,1,0)
        
        self.df.dropna(inplace=True)
        
    def train_test_split(self):
        self.X = self.df.iloc[:,[7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22]]
        self.y = self.df['UpDown']
        self.X_train,self.X_test,self.y_train,self.y_test = train_test_split(self.X,self.y,test_size=0.2,random_state=200)
        
    def model(self):
        self.mod = GradientBoostingClassifier(n_estimators=15)
        self.mod.fit(self.X_train,self.y_train)
        
    def predict(self):
        self.y_pred_test = self.mod.predict(self.X_test)
        self.y_pred_train = self.mod.predict(self.X_train)
        
    def model_performance(self):
        
        self.confusion_matrix_train = confusion_matrix(self.y_train,self.y_pred_train)
        self.confusion_matrix_test = confusion_matrix(self.y_test,self.y_pred_test)
        self.precision_score_train = precision_score(self.y_train,self.y_pred_train)
        self.precision_score_test = precision_score(self.y_test,self.y_pred_test)
        self.recall_score_train = recall_score(self.y_train,self.y_pred_train)
        self.recall_score_test = recall_score(self.y_test,self.y_pred_test)
        self.f1_score_train = f1_score(self.y_train,self.y_pred_train)
        self.f1_score_test = f1_score(self.y_test,self.y_pred_test)
        self.roc_auc_score_train = roc_auc_score(self.y_train,self.y_pred_train)
        self.roc_auc_score_test = roc_auc_score(self.y_test,self.y_pred_test)
        Score_Train = self.mod.score(self.X_train,self.y_train)
        Score_Test = self.mod.score(self.X_test,self.y_test)
        Precision_Train = self.precision_score_train
        Precision_Test = self.precision_score_test
        Recall_Train = self.recall_score_train
        Recall_Test = self.recall_score_test
        f1_score_Train = self.f1_score_train
        f1_score_Test = self.f1_score_test
        ROC_AUC_Train = self.roc_auc_score_train
        ROC_AUC_Test = self.roc_auc_score_test
        TP_Train = self.confusion_matrix_train.tolist()[0][0]
        FP_Train = self.confusion_matrix_train.tolist()[0][1]
        FN_Train = self.confusion_matrix_train.tolist()[1][0]
        TN_Train = self.confusion_matrix_train.tolist()[1][1]
        TP_Test = self.confusion_matrix_test.tolist()[0][0]
        FP_Test = self.confusion_matrix_test.tolist()[0][1]
        FN_Test = self.confusion_matrix_test.tolist()[1][0]
        TN_Test = self.confusion_matrix_test.tolist()[1][1]
        return (Score_Train,Score_Test,Precision_Train,Precision_Test,Recall_Train,Recall_Test,f1_score_Train,
        f1_score_Test,ROC_AUC_Train,ROC_AUC_Test,TP_Train,FP_Train,FN_Train,TN_Train,TP_Test,FP_Test,FN_Test,TN_Test)

In [4]:

a=[]

Nifty_backtest_ML = backtest_ML("^NSEI",start,end)
a.append(Nifty_backtest_ML.model_performance())
Score_Train = [a[0] for a in a]
Score_Test = [a[1] for a in a]
Precision_Train = [a[2] for a in a]
Precision_Test = [a[3] for a in a]
Recall_Train = [a[4] for a in a]
Recall_Test = [a[5] for a in a]
f1_score_Train = [a[6] for a in a]
f1_score_Test = [a[7] for a in a]
ROC_AUC_Train = [a[8] for a in a]
ROC_AUC_Test = [a[9] for a in a]
TP_Train = [a[10] for a in a]
FP_Train = [a[11] for a in a]
FN_Train = [a[12] for a in a]
TN_Train = [a[13] for a in a]
TP_Test = [a[14] for a in a]
FP_Test = [a[15] for a in a]
FN_Test = [a[16] for a in a]
TN_Test = [a[17] for a in a]

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [5]:
Results = pd.DataFrame({'Score_Train':Score_Train,'Score_Test':Score_Test,'Precision_Train':Precision_Train,'Precision_Test'
                       :Precision_Test,'Recall_Train':Recall_Train,'Recall_Test':Recall_Test,'f1_score_Train':f1_score_Train,
                       'f1_score_Test':f1_score_Test,'ROC_AUC_Train':ROC_AUC_Train,'ROC_AUC_Test':ROC_AUC_Test,'TP_Train':TP_Train,
                       'FP_Train':FP_Train,'FN_Train':FN_Train,'TN_Train':TN_Train,'TP_Test':TP_Test,'FP_Test':FP_Test,
                       'FN_Test':FN_Test,'TN_Test':TN_Test})

Results

Unnamed: 0,Score_Train,Score_Test,Precision_Train,Precision_Test,Recall_Train,Recall_Test,f1_score_Train,f1_score_Test,ROC_AUC_Train,ROC_AUC_Test,TP_Train,FP_Train,FN_Train,TN_Train,TP_Test,FP_Test,FN_Test,TN_Test
0,0.745547,0.676845,0.726761,0.659218,0.714681,0.641304,0.72067,0.650138,0.743223,0.674719,656,194,206,516,148,61,66,118
