# Mean Reversion Strategy on Nifty and NiftyBank

In [1]:
# Import the required libraries
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt  
%matplotlib inline
plt.style.use('seaborn-darkgrid')

In [2]:
# Fix the backtesting period

end = pd.datetime.now().date()
start = end - pd.Timedelta(days=1*252)

  end = pd.datetime.now().date()


In [3]:
# Create a class for backtesting pairs
class mean_reversion_pairs:
    
    # Create object attributes/methods
    def __init__(self,x,y,start,end,lookback,std_dev):
        
        self.x = x # Independent variable
        self.y = y # Dependent variable
        self.start =start 
        self.end = end
        self.lookback = lookback # Lookback period to consider moving average
        self.std_dev = std_dev # Number of standard deivations away to create bollinger band
        
        self.fetch_data()
        self.hedge_ratio()
        self.ADFtest()
        self.indicators()
        self.positions()
        self.returns()
        self.calc_drawdown()
        
    # To fetch the data from yahoo finance for both stocks x and y
    def fetch_data(self):
        
        self.dfx = yf.download(self.x,start,end)
        self.dfy = yf.download(self.y,start,end)
        
        # Create a single dataframe to concat only the closing prices for backtesting purpose
        self.df = pd.concat([self.dfx['Adj Close'],self.dfy['Adj Close']],axis=1)
        self.df.columns =[self.x,self.y]
        self.df.index = pd.to_datetime(self.df.index)
    
    # Calculate hedge ratio by running ordinary least square regression
    def hedge_ratio(self):
        import statsmodels.api as sm
        self.model = sm.OLS(self.df[self.y].iloc[:120],self.df[self.x].iloc[:120])
        self.model = self.model.fit()
        self.df['spread'] = self.df[self.y] - self.model.params[0] * self.df[self.x]
        return self.model.params[0]
    
    # Perform ADF test on the spread of the pair and return the T statistic value
    def ADFtest(self):
        from statsmodels.tsa.stattools import adfuller
        self.ADF = adfuller(self.df['spread'],maxlag=1)
        return self.ADF[0]
    
    # Compute the required indicators for bollinger band
    def indicators(self):
        
        # Moving Average
        self.df['moving_average'] = self.df['spread'].rolling(self.lookback,center=False).mean()
        
        # Moving Standard Deviation
        self.df['moving_std_dev'] = self.df['spread'].rolling(self.lookback,center=False).std()

        # Upper band and lower band
        self.df['upper_band'] = self.df['moving_average'] + self.std_dev*self.df['moving_std_dev']
        self.df['lower_band'] = self.df['moving_average'] - self.std_dev*self.df['moving_std_dev']
    
    # Compute the positions based on the signal
    def positions(self):
        
        # Long positions
        self.df['long_entry'] = self.df['spread'] < self.df['lower_band']
        self.df['long_exit'] = self.df['spread'] >= self.df['moving_average']

        self.df['positions_long'] = np.nan
        self.df.loc[self.df['long_entry'], 'positions_long'] = 1
        self.df.loc[self.df['long_exit'], 'positions_long'] = 0
        self.df['positions_long'] = self.df['positions_long'].fillna(method='ffill')

        # Short positions
        self.df['short_entry'] = self.df['spread'] > self.df['upper_band']
        self.df['short_exit'] = self.df['spread'] <= self.df['moving_average']

        self.df['positions_short'] = np.nan
        self.df.loc[self.df['short_entry'], 'positions_short'] = -1
        self.df.loc[self.df['short_exit'], 'positions_short'] = 0
        self.df['positions_short'] = self.df['positions_short'].fillna(method='ffill')
        
        # Positions
        self.df['positions'] = self.df['positions_long'] + self.df['positions_short']
    
    # Calculate the strategy returns
    def returns(self):
        
        self.df['percentage_change'] = (self.df['spread'] - self.df['spread'].shift(1))/(self.model.params[0]*self.df[self.x] + self.df[self.y])
        self.df['strategy_returns'] = self.df['positions'].shift(1) * self.df['percentage_change']
        self.df['cumulative_returns'] = (self.df['strategy_returns']+1).cumprod()
        print("The total strategy returns are " ,((self.df['cumulative_returns'].iloc[-1]-1)*100))
        return (self.df['cumulative_returns'].iloc[-1]-1)*100
    
    # Calculate the max drawdown of the returns
    def calc_drawdown(self):
        # Calculate the running maximum
        self.running_max = np.maximum.accumulate(self.df['cumulative_returns'].dropna())
        # Ensure the value never drops below 1
        self.running_max[self.running_max < 1] = 1
        # Calculate the percentage drawdown
        self.drawdown = (self.df['cumulative_returns'])/self.running_max - 1
        return self.drawdown.min()*100

In [4]:
# Run the strategy by considering different look back period and number of standard deviations away from the mean
Stocks_X_list = ['^NSEI']
Stocks_Y_list = ['^NSEBANK']

X=[]
Y=[]

lookback_list =range(5,25,2)
stdev_list = [1,1.5,2,2.5]

lookbackperiod =[]
stdev_range = []
Hedge_ratio = []
ADF_Tstat = []
Strat_Returns = []
Drawdown = []

for i in Stocks_X_list:
    for j in Stocks_Y_list:
        for k in lookback_list:
            for l in stdev_list:
                if i == j:
                    continue
                else:
                
            
                    Pairs = mean_reversion_pairs(i,j,start,end,k,l)
                    X.append(i)
                    Y.append(j)
                    lookbackperiod.append(k)
                    stdev_range.append(l)
                    Hedge_ratio.append(Pairs.hedge_ratio())
                    ADF_Tstat.append(Pairs.ADFtest())
                    Strat_Returns.append(Pairs.returns())
                    Drawdown.append(Pairs.calc_drawdown())

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
The total strategy returns are  -2.0755697522062966
The total strategy returns are  -2.0755697522062966
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
The total strategy returns are  -1.46209386773839
The total strategy returns are  -1.46209386773839
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
The total strategy returns are  0.0
The total strategy returns are  0.0
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
The total strategy returns are  0.0
The total strategy returns are  0.0
[*********************100%***********************]  1 of 1 completed
[*********************100%*****

[*********************100%***********************]  1 of 1 completed
The total strategy returns are  -0.48365195313512244
The total strategy returns are  -0.48365195313512244
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
The total strategy returns are  0.8557388371371433
The total strategy returns are  0.8557388371371433
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
The total strategy returns are  -3.1627292838319443
The total strategy returns are  -3.1627292838319443
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
The total strategy returns are  -2.7123192111488725
The total strategy returns are  -2.7123192111488725
[*********************100%***********************]  1 of 1 completed
[*********************100%******

In [5]:
# Convert the results in to dataframe for validation
Pair_Strategy = pd.DataFrame({'Stock_X':X,'Stock_Y':Y,'Lookback' :lookbackperiod,'Std dev':stdev_range,'Hedge_Ratio':Hedge_ratio,'ADF Tstat': ADF_Tstat,'Strategy Returns':Strat_Returns,'Max Drawdown': Drawdown})


In [6]:
# Sort the dataframe based on highest returns
Pair_Strategy.sort_values(by='Strategy Returns',ascending=False)


Unnamed: 0,Stock_X,Stock_Y,Lookback,Std dev,Hedge_Ratio,ADF Tstat,Strategy Returns,Max Drawdown
35,^NSEI,^NSEBANK,21,2.5,2.176734,-2.454596,0.855739,-2.172023
20,^NSEI,^NSEBANK,15,1.0,2.176734,-2.454596,0.798223,-2.865298
27,^NSEI,^NSEBANK,17,2.5,2.176734,-2.454596,0.781845,-2.172023
31,^NSEI,^NSEBANK,19,2.5,2.176734,-2.454596,0.781845,-2.172023
39,^NSEI,^NSEBANK,23,2.5,2.176734,-2.454596,0.660588,-1.922077
15,^NSEI,^NSEBANK,11,2.5,2.176734,-2.454596,0.327231,-0.616239
14,^NSEI,^NSEBANK,11,2.0,2.176734,-2.454596,0.212441,-1.583649
6,^NSEI,^NSEBANK,7,2.0,2.176734,-2.454596,0.204148,-1.583649
30,^NSEI,^NSEBANK,19,2.0,2.176734,-2.454596,0.069502,-3.038033
22,^NSEI,^NSEBANK,15,2.0,2.176734,-2.454596,0.067798,-3.038033
