In [25]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# What exactly do I wanna test?

Can I systematically size and trade mean-reverting spreads in equities using statistics? 
Does this survive realistic frictions?

# Scope of Data
Asset Class -> Equities  
Market -> Indian Equities  
Frequency -> Daily (to start with)  
Horizon -> 5 years  
Strategy type -> long-short (for market neutral pairs)  
Capital Assumption -> 100 INR per pair (normalized) (constant gross exposure)  
Sector/Universe -> Automobiles

## Hypothesis

Within the Indian automobile sector, certain equity pairs exhibit statistically significant cointegration due to shared macro drivers. Deviations from equilibrium revert within a tradable horizon, allowing for profitable market-neutral strategies.

## Universe of Stocks

The following list has been selected with the help of AI. They all fall in the category of Indian automobile OEMs with similar revenue exposure.  

* Maruti Suzuki India Limited
* Mahindra & Mahindra
* Tata Motors
* Bajaj Auto
* Hero MotoCorp
* TVS MotorCompany
* Eicher Motors
* Ashok Leyland

## Data Specification

* Source: Yahoo Finance
* Instrument type: NSE-listed equities
* Price field: Adjusted Close
* Frequency: Daily
* Time zone: IST
* Trading calendar: NSE holidays respected
* Lookback window: Rolling 5 years


In [26]:
import yfinance as yf

ticker_list = ['MARUTI.NS', 'M&M.NS', 'TMCV.NS', 'BAJAJ-AUTO.NS', 'HEROMOTOCO.NS', 'TVSMOTOR.NS', 'EICHERMOT.NS', 'ASHOKLEY.NS']
data = yf.download(ticker_list, period='5y')
data.columns

close = data['Close']
close.head()

  data = yf.download(ticker_list, period='5y')
[*********************100%***********************]  8 of 8 completed
  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Ticker,ASHOKLEY.NS,BAJAJ-AUTO.NS,EICHERMOT.NS,HEROMOTOCO.NS,M&M.NS,MARUTI.NS,TMCV.NS,TVSMOTOR.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-04,47.563046,3080.315674,2531.947754,2561.118164,714.638916,7382.174316,,483.264954
2021-01-05,47.883022,3054.255859,2550.253174,2580.764648,706.052979,7337.271973,,479.607117
2021-01-06,48.043022,3028.06543,2522.080078,2594.521729,702.236938,7311.538574,,476.14447
2021-01-07,50.762867,3006.422119,2526.32251,2570.709961,710.155151,7251.586914,,487.020233
2021-01-08,51.562824,3086.174805,2632.197754,2659.773438,735.054382,7681.782227,,500.724701


In [27]:
close.shape

(1239, 8)

In [28]:
close.index.is_monotonic_increasing

True

In [29]:
for column in close.columns:
    print(close[column].isna().sum())
    

0
0
0
0
0
0
1201
0


In [30]:
# Making the decision to drop Tata Motors from the universe due to lack of consistent data
close = close.drop(columns=['TMCV.NS'])
print(close.shape)
print(close.index.is_monotonic_increasing)
for column in close.columns:
    print(close[column].isna().sum())

(1239, 7)
True
0
0
0
0
0
0
0


In [31]:
log_prices = np.log(close)

In [32]:
log_prices.head()

Ticker,ASHOKLEY.NS,BAJAJ-AUTO.NS,EICHERMOT.NS,HEROMOTOCO.NS,M&M.NS,MARUTI.NS,TVSMOTOR.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-04,3.862056,8.032787,7.836744,7.848199,6.571777,8.906823,6.180565
2021-01-05,3.868761,8.024291,7.843948,7.855841,6.55969,8.900722,6.172967
2021-01-06,3.872097,8.015679,7.832839,7.861157,6.554271,8.897209,6.165721
2021-01-07,3.927165,8.008506,7.83452,7.851937,6.565483,8.888976,6.188306
2021-01-08,3.942801,8.034688,7.875574,7.885996,6.599944,8.946607,6.216056


In [33]:
log_prices.to_csv("stat_arb_data.csv")