In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.preprocessing import PolynomialFeatures

In [2]:
df = pd.read_csv("GOLD.csv")

In [3]:
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Pred,new
0,"May 04, 2017",28060,28400,28482,28025,0.08K,-1.79%,738.0,117.570740
1,"May 05, 2017",28184,28136,28382,28135,0.06K,0.44%,-146.0,295.430176
2,"May 08, 2017",28119,28145,28255,28097,7.85K,-0.23%,30.0,132.123714
3,"May 09, 2017",27981,28125,28192,27947,10.10K,-0.49%,357.0,101.298064
4,"May 10, 2017",28007,28060,28146,27981,9.28K,0.09%,124.0,112.153318
5,"May 11, 2017",28022,27995,28100,27945,9.72K,0.05%,149.0,182.427089
6,"May 12, 2017",28019,28088,28195,27985,9.48K,-0.01%,167.0,141.255137
7,"May 15, 2017",28008,28049,28157,27996,8.76K,-0.04%,22.0,120.069010
8,"May 16, 2017",28109,28025,28159,28025,7.73K,0.36%,34.0,218.401641
9,"May 17, 2017",28614,28170,28638,28170,15.92K,1.80%,420.0,919.376358


In [4]:
predNull = df[df.Pred.isnull()]
prednonNull = df[~df.Pred.isnull()]

### Training for 'Pred' Column

In [5]:
## Assigning the Values for Training
X = prednonNull[['Price','Open','High','Low']]
y = prednonNull.Pred

## Now splitting the data for training and evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

## Now Applying Linear Model 
model = LinearRegression()
model.fit(X_train,y_train)

# Predict
y_predicted = model.predict(X_test)


rmse = mean_squared_error(y_test, y_predicted)
r2 = r2_score(y_test, y_predicted)

# printing values
print('Slope:' ,model.coef_)
print('Intercept:', model.intercept_)
print('Root mean squared error: ', rmse*100)
print('R2 score: ', r2)

Slope: [ 2.  3. -1. -4.]
Intercept: 2.05773176276125e-11
Root mean squared error:  7.806506351503324e-20
R2 score:  1.0


### Training for 'New' column

In [6]:
## Now lets train model for new column
## Assigning the values we have

x1 = df[['Price','Open','High','Low']]
y1 = df.new

X1_train, X1_test, y1_train, y1_test = train_test_split(x1, y1, test_size=0.2, random_state=32)

model1 = LinearRegression()
model1.fit(X1_train,y1_train)

y1predict = model1.predict(X1_test)

# model evaluation
rmse1 = mean_squared_error(y1_test, y1predict)
r21 = r2_score(y1_test, y1predict)

# printing values
print('Slope:' ,model1.coef_)
print('Intercept:', model1.intercept_)
print('Root mean squared error: ', rmse1*100)
print('R2 score: ', r21)

Slope: [ 1.01328746 -1.00030478  1.00495156 -1.01799734]
Intercept: 0.33049980693567704
Root mean squared error:  20.770237132866896
R2 score:  0.9999943052145427


### Using Polynomial Function

<p> Lower the rmse value better the fit <p>

In [7]:
polynomial_features= PolynomialFeatures(degree=2)
x_poly_train = polynomial_features.fit_transform(X1_train)
x_poly_test = polynomial_features.fit_transform(X1_test)
model3 = LinearRegression()
model3.fit(x_poly_train, y1_train)
y_poly_pred = model3.predict(x_poly_test)

rmse2 = np.sqrt(mean_squared_error(y1_test,y_poly_pred))
r22 = r2_score(y1_test,y_poly_pred)
# printing values
print('Slope:' ,model3.coef_)
print('Intercept:', model3.intercept_)
print('Root mean squared error: ', rmse2*100)
print('R2 score: ', r22)

Slope: [ 0.00000000e+00  1.01130808e+00 -1.00044090e+00  1.00481408e+00
 -1.01574695e+00  1.67967817e-07 -1.15633457e-06  3.28375419e-05
 -3.23847148e-05 -5.50020409e-07  8.27571033e-07  1.44559470e-06
 -1.84659417e-07 -3.34556920e-05  3.24545467e-05]
Intercept: 0.33024505497769496
Root mean squared error:  1.6030631207201724
R2 score:  0.9999999929540889


### CAPM

In [8]:
from datetime import datetime ,timedelta
import pandas_datareader.data as pdr

start = datetime(2017 , 5 ,3)
end = datetime(2019, 5 , 3)

## Reading data and saving only Close Price in other data frame.
data = pdr.DataReader(['NVDA','^GSPC'],'yahoo',start,end)
db = data.Close
db.head(3)

Symbols,NVDA,^GSPC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-05-02,103.480003,2391.169922
2017-05-03,104.25,2388.129883
2017-05-04,103.849998,2389.52002


In [9]:
db.isnull().values.any()

False

In [10]:
import statsmodels.api as sm


today = db.index.max()
past = today - timedelta(days=90)
mon3 = db[past:today]
display(today,past,mon3.head(3))

Timestamp('2019-05-03 00:00:00')

Timestamp('2019-02-02 00:00:00')

Symbols,NVDA,^GSPC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-04,149.179993,2724.870117
2019-02-05,149.949997,2737.699951
2019-02-06,153.0,2731.610107


In [11]:
## Calculating the Daily Returns
return_daily = mon3.pct_change(1)
return_daily.columns=['NVDA','^GSPC']
return_daily.dropna(inplace=True)
return_daily.head(5)

Unnamed: 0_level_0,NVDA,^GSPC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-05,0.005162,0.004708
2019-02-06,0.02034,-0.002224
2019-02-07,-0.036471,-0.009357
2019-02-08,0.005088,0.000676
2019-02-11,-0.011608,0.000709


In [12]:
x = return_daily.NVDA
y = return_daily['^GSPC']

## Applying the Model
x1 = sm.add_constant(x)


## Regression Model
model = sm.OLS(y,x1)

results = model.fit()
print(results.summary())
print('Beta for our model is',results.params)

                            OLS Regression Results                            
Dep. Variable:                  ^GSPC   R-squared:                       0.400
Model:                            OLS   Adj. R-squared:                  0.390
Method:                 Least Squares   F-statistic:                     39.92
Date:                Sat, 15 Aug 2020   Prob (F-statistic):           3.59e-08
Time:                        06:54:29   Log-Likelihood:                 248.12
No. Observations:                  62   AIC:                            -492.2
Df Residuals:                      60   BIC:                            -488.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0007      0.001      1.133      0.2

  return ptp(axis=axis, out=out, **kwargs)


In [13]:

## This Method is deprecated 
# g = db.resample('M',how='mean')

## Therefore we will use the Grouper with group_by 
m = db.groupby(pd.Grouper(freq='M')).mean()
m.columns = ['NVDA','^GSPC']


## Now using linear OMS
month_return = m.pct_change()
month_return.dropna(inplace=True)
month_return.head(3)


x2 = month_return.NVDA
y2 = month_return['^GSPC']


x3 = sm.add_constant(x2)
model1 = sm.OLS(y2 , x3)
result = model1.fit()

print(result.summary())
print('The Beta is ' , result.params)

                            OLS Regression Results                            
Dep. Variable:                  ^GSPC   R-squared:                       0.489
Model:                            OLS   Adj. R-squared:                  0.466
Method:                 Least Squares   F-statistic:                     21.06
Date:                Sat, 15 Aug 2020   Prob (F-statistic):           0.000143
Time:                        06:54:30   Log-Likelihood:                 61.804
No. Observations:                  24   AIC:                            -119.6
Df Residuals:                      22   BIC:                            -117.3
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0051      0.004      1.265      0.2

### Covarience

In [14]:
x_c = np.array(month_return.NVDA).reshape(-1,1)
y_c = np.array(month_return['^GSPC']).reshape(-1,1)


model3 = LinearRegression()
result3 = model3.fit(x_c,y_c)
print('Coefficient or Beta or slope is',str(result3.coef_))

Coefficient or Beta or slope is [[0.17378383]]


### Inferences from Beta values

<p> if beta=0 then there is no correlation between stock and index(eg Sensex)<br>
if beta=1 then it increases or decreases in same amount with the market value<br>
if beta>1 then the price will increase or decrease more prominently<br>
if beta less than 1 then the prices will increase or decrease less prominently<br>
if beta=negative then there will be inverse relation <p>

In [15]:
start = datetime(2017 , 5 ,3)
end = datetime(2019, 5 , 3)

## Reading data and saving only Close Price in other data frame.
data = pdr.DataReader(['MSFT','^GSPC'],'yahoo',start,end)
db = data.Close

today = db.index.max()
past = today - timedelta(days=60)
mon3 = db[past:today]


## Calculating the Daily Returns
return_daily = mon3.pct_change(1)
return_daily.columns=['MSFT','^GSPC']
return_daily.dropna(inplace=True)
return_daily.head(5)

x = return_daily['MSFT']
y = return_daily['^GSPC']

## Applying the Model
x1 = sm.add_constant(x)


## Regression Model
model = sm.OLS(y,x1)

results = model.fit()
print(results.summary())
print('Beta for our model is',results.params)

                            OLS Regression Results                            
Dep. Variable:                  ^GSPC   R-squared:                       0.559
Model:                            OLS   Adj. R-squared:                  0.548
Method:                 Least Squares   F-statistic:                     51.88
Date:                Sat, 15 Aug 2020   Prob (F-statistic):           8.53e-09
Time:                        06:54:32   Log-Likelihood:                 177.17
No. Observations:                  43   AIC:                            -350.3
Df Residuals:                      41   BIC:                            -346.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -8.645e-05      0.001     -0.135      0.8

  return ptp(axis=axis, out=out, **kwargs)
