In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

## Load the csv file for Gold and Nifty

In [2]:
df = pd.read_csv('/home/user/Desktop/GOLD.csv')
dat = df.copy()

In [3]:
df.drop(['Date','Vol.','Change %'],axis = 1,inplace = True)

In [4]:
df.dtypes

Price      int64
Open       int64
High       int64
Low        int64
Pred     float64
new      float64
dtype: object

## Use standard scaler to change mean = 1 and standard deviation = 1

In [5]:
ss = StandardScaler()
ss2 = StandardScaler()

In [6]:
df[['Price','Open','High','Low']] = ss.fit_transform(df[['Price','Open','High','Low']])

In [7]:
df[['Pred']] = ss2.fit_transform(df[['Pred']])

In [8]:
lr = LinearRegression()

In [9]:
train = df[~np.isnan(df.Pred)]

In [10]:
test = df[np.isnan(df.Pred)]

In [11]:
x = train.drop(['Pred','new'],axis = 1)

In [12]:
y = train['Pred']

## Fitting linear regression model for Pred Column

In [13]:
lr.fit(x,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## r2 score = 1 indicating perfect fit

In [14]:
lr.score(x,y)

1.0

## Predicting remaining values and adding to dataframe

In [15]:
pred = lr.predict(test[['Price','Open','High','Low']])

In [16]:
pred = ss2.inverse_transform(pred)

In [17]:
dat['Pred'][-101:] = pred

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


## Viewing coefficients and intercept
### y = a0 + a1.x1 + a2.x2 + a3.x3 + a4.x4

In [18]:
lr.coef_

array([  9.47844457,  14.35579475,  -4.8228097 , -18.7732473 ])

In [19]:
lr.intercept_

0.10460116203430563

## Fitting the New Column

In [20]:
df[['new']] = ss2.fit_transform(df[['new']])

In [21]:
x2 = df.drop(['Pred','new'],axis = 1)
y2 = df['new']

In [22]:
from sklearn.model_selection import train_test_split

In [23]:
train_x,test_x,train_y,test_y = train_test_split(x2,y2)

In [24]:
lr.fit(train_x,train_y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## Score is less than that for Pred column

In [25]:
lr.score(test_x,test_y)

0.9999929240005823

The results above indicate that Pred is a linear feature while, new is a polynomial feature

## Loading Nifty index and Stocks file

In [28]:
axis = pd.read_csv('/home/user/Desktop/AXISBANK.csv')
nf = pd.read_csv('/home/user/Desktop/Nifty50.csv')

In [29]:
axis.head()

Unnamed: 0,Symbol,Series,Date,Prev Close,Open Price,High Price,Low Price,Last Price,Close Price,Average Price,Total Traded Quantity,Turnover,No. of Trades,Deliverable Qty,% Dly Qt to Traded Qty
0,AXISBANK,EQ,15-May-2017,503.0,505.0,508.5,498.4,500.25,500.1,502.95,8806756,4429384000.0,109965,4320438,49.06
1,AXISBANK,EQ,16-May-2017,500.1,502.1,504.5,493.5,502.05,501.5,499.85,8610492,4303938000.0,101028,4027982,46.78
2,AXISBANK,EQ,17-May-2017,501.5,501.35,506.75,498.5,502.0,502.8,502.88,6443135,3240154000.0,84189,2876528,44.64
3,AXISBANK,EQ,18-May-2017,502.8,498.0,499.1,490.25,492.85,492.0,494.96,9227022,4566993000.0,95624,5348691,57.97
4,AXISBANK,EQ,19-May-2017,492.0,495.75,503.85,494.2,501.9,501.7,499.1,7611430,3798900000.0,72985,4190505,55.06


In [30]:
nf.head()

Unnamed: 0,Date,Open,High,Low,Close,Shares Traded,Turnover (Rs. Cr)
0,15-May-2017,9433.55,9449.25,9423.1,9445.4,145163953,7790.97
1,16-May-2017,9461.0,9517.2,9456.35,9512.25,171709433,9209.89
2,17-May-2017,9517.6,9532.6,9486.1,9525.75,211856040,10726.23
3,18-May-2017,9453.2,9489.1,9418.1,9429.45,199340647,10782.46
4,19-May-2017,9469.9,9505.75,9390.75,9427.9,259861396,11544.77


In [31]:
daily_prices = pd.concat([axis['Close Price'],nf['Close']],axis = 1,)

In [32]:
daily_prices.head()

Unnamed: 0,Close Price,Close
0,500.1,9445.4
1,501.5,9512.25
2,502.8,9525.75
3,492.0,9429.45
4,501.7,9427.9


In [33]:
import statsmodels.api as sm
from statsmodels.regression.linear_model import OLS

## Calculating daily beta value for given stocks

In [34]:
daily_chg = daily_prices.pct_change()

In [35]:
daily_chg.dropna(inplace=True)

In [36]:
daily_chg.rename(columns={'Close Price':'AxisBank','Close':'Nifty'},inplace=True)

In [37]:
x = daily_chg.Nifty
y = daily_chg.AxisBank

In [38]:
x1 = sm.add_constant(x)

In [42]:
model = OLS(y,x1)

In [43]:
res = model.fit()

In [44]:
res.summary()

0,1,2,3
Dep. Variable:,AxisBank,R-squared:,0.198
Model:,OLS,Adj. R-squared:,0.196
Method:,Least Squares,F-statistic:,121.3
Date:,"Wed, 13 May 2020",Prob (F-statistic):,2.25e-25
Time:,17:39:02,Log-Likelihood:,1352.8
No. Observations:,494,AIC:,-2702.0
Df Residuals:,492,BIC:,-2693.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0005,0.001,0.754,0.451,-0.001,0.002
Nifty,1.0806,0.098,11.016,0.000,0.888,1.273

0,1,2,3
Omnibus:,87.229,Durbin-Watson:,2.2
Prob(Omnibus):,0.0,Jarque-Bera (JB):,661.3
Skew:,0.512,Prob(JB):,2.5100000000000002e-144
Kurtosis:,8.575,Cond. No.,139.0


### beta = 1.0806
#### one indicates a stock has the same volatility as the market
#### more than one indicates a stock that’s more volatile than its benchmark
#### less than one is less volatile than the benchmark

## Calculating monthly beta values for the given stocks

In [48]:
nf.Date = pd.to_datetime(nf.Date)

In [49]:
axis.Date = pd.to_datetime(axis.Date)

In [57]:
nf['Month'] = nf.Date.dt.to_period('M')

In [58]:
axis['Month'] = axis.Date.dt.to_period('M')

In [63]:
nf_month = nf.groupby('Month').last()

In [64]:
axis_month = axis.groupby('Month').last()

In [65]:
monthly_prices = pd.concat([nf_month['Close'],axis_month['Close Price']],axis = 1)

In [66]:
monthly_chg = monthly_prices.pct_change()

In [67]:
monthly_chg.dropna(inplace=True)

In [69]:
monthly_chg.rename(columns={'Close':'Nifty','Close Price':'AxisBank'},inplace=True)

In [70]:
x = monthly_chg.Nifty
y = monthly_chg.AxisBank

In [71]:
x1 = sm.add_constant(x)

In [72]:
model = OLS(y,x1)

In [73]:
res = model.fit()

In [74]:
res.summary()

0,1,2,3
Dep. Variable:,AxisBank,R-squared:,0.357
Model:,OLS,Adj. R-squared:,0.327
Method:,Least Squares,F-statistic:,12.2
Date:,"Wed, 13 May 2020",Prob (F-statistic):,0.00206
Time:,17:47:51,Log-Likelihood:,35.769
No. Observations:,24,AIC:,-67.54
Df Residuals:,22,BIC:,-65.18
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0100,0.012,0.849,0.405,-0.014,0.034
Nifty,1.0131,0.290,3.492,0.002,0.411,1.615

0,1,2,3
Omnibus:,13.905,Durbin-Watson:,2.345
Prob(Omnibus):,0.001,Jarque-Bera (JB):,13.355
Skew:,1.43,Prob(JB):,0.00126
Kurtosis:,5.276,Cond. No.,25.0


### beta = 1.0131

Conclusion - 
From the regression models, we observe that the beta values of both monthly and daily prices are slightly greater than 1. This implies that the gold stock is slightly more volatile than the market standards