### Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

### Generating a random data with 5 columns of 100 values each and a seperate y column

In [6]:
np.random.seed(999)

data = pd.DataFrame(np.random.uniform(0,10,(100,6)),
                   columns=['x1','x2','x3','x4','x5','y'])
data

Unnamed: 0,x1,x2,x3,x4,x5,y
0,8.034280,5.275223,1.191115,6.396814,0.909253,3.322257
1,4.273809,5.543858,6.281265,6.973929,7.899497,1.318904
2,3.427705,2.015596,7.073242,0.333993,9.092500,4.051607
3,7.604355,4.737584,2.867189,7.512925,0.970899,4.123578
4,2.816390,3.902778,8.711092,0.812451,5.579312,5.475343
...,...,...,...,...,...,...
95,2.100351,9.688033,9.462332,0.654448,8.618300,4.713433
96,5.264365,1.370052,1.402034,9.898785,9.880575,9.983077
97,6.395436,2.352666,2.745412,6.391808,6.044036,2.168950
98,9.882815,5.199813,0.201809,7.495199,5.374360,2.124887


### Saving the dataframe as a csv file

In [24]:
data.to_csv(r'C:\Users\Manoj\E21007.csv', index=False)

### Fitting the regression model with all features

In [13]:
results = sm.OLS(endog= data['y'],exog=sm.add_constant(data.iloc[:,:5])).fit()
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.051
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,1.016
Date:,"Fri, 02 Jul 2021",Prob (F-statistic):,0.413
Time:,12:57:19,Log-Likelihood:,-244.69
No. Observations:,100,AIC:,501.4
Df Residuals:,94,BIC:,517.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.3666,1.246,4.306,0.000,2.892,7.841
x1,-0.0317,0.100,-0.317,0.752,-0.231,0.167
x2,-0.1240,0.100,-1.239,0.218,-0.323,0.075
x3,-0.0682,0.107,-0.637,0.525,-0.281,0.144
x4,-0.0747,0.100,-0.744,0.459,-0.274,0.125
x5,0.1401,0.098,1.429,0.156,-0.055,0.335

0,1,2,3
Omnibus:,17.654,Durbin-Watson:,1.799
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5.212
Skew:,0.199,Prob(JB):,0.0738
Kurtosis:,1.955,Cond. No.,49.7


### Fitting the model with any 2 features

In [14]:
results = sm.OLS(endog= data['y'],exog=sm.add_constant(data.iloc[:,2:4])).fit()
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.015
Model:,OLS,Adj. R-squared:,-0.005
Method:,Least Squares,F-statistic:,0.7346
Date:,"Fri, 02 Jul 2021",Prob (F-statistic):,0.482
Time:,12:58:48,Log-Likelihood:,-246.57
No. Observations:,100,AIC:,499.1
Df Residuals:,97,BIC:,507.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.4349,0.737,7.370,0.000,3.971,6.899
x3,-0.0899,0.107,-0.843,0.401,-0.302,0.122
x4,-0.0898,0.099,-0.904,0.368,-0.287,0.107

0,1,2,3
Omnibus:,17.225,Durbin-Watson:,1.829
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5.156
Skew:,0.199,Prob(JB):,0.0759
Kurtosis:,1.961,Cond. No.,18.2


### Calculating the Partial F - Statistic

In [18]:
# Precidting the values of y for 2 features
yhat = results.predict(sm.add_constant(data.iloc[:,2:4]))
yhat

0     4.753707
1     4.244206
2     4.768937
3     4.502822
4     4.578720
        ...   
95    4.525350
96    4.420433
97    4.614394
98    4.744083
99    4.642610
Length: 100, dtype: float64

In [19]:
hypotheses = '(x3 = 0), (x4 = 0)'
f_test = results.f_test(hypotheses)
print(f_test)

<F test: F=array([[0.73460927]]), p=0.48234222410339134, df_denom=97, df_num=2>


### Checking for stationarity and Granger Casuality Test

In [25]:
from statsmodels.tsa.stattools import grangercausalitytests, adfuller

In [27]:
## Checking for stationarity

def check_stationarity(y):
    result = adfuller(y)
    print('ADF Statitic : %f' %result[0])
    print('p - value : %f' %result[1])
    print('Critical Values')
    for key, value in result[4].items():
        print('\t%s : %3f' %(key, value))

In [30]:
check_stationarity(yhat)

ADF Statitic : -9.824680
p - value : 0.000000
Critical Values
	1% : -3.498198
	5% : -2.891208
	10% : -2.582596


In [35]:
#Granger Casuality Test
grangercausalitytests(data.iloc[:,2:4], maxlag = 3)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=2.6436  , p=0.1072  , df_denom=96, df_num=1
ssr based chi2 test:   chi2=2.7262  , p=0.0987  , df=1
likelihood ratio test: chi2=2.6894  , p=0.1010  , df=1
parameter F test:         F=2.6436  , p=0.1072  , df_denom=96, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=1.8901  , p=0.1568  , df_denom=93, df_num=2
ssr based chi2 test:   chi2=3.9833  , p=0.1365  , df=2
likelihood ratio test: chi2=3.9045  , p=0.1420  , df=2
parameter F test:         F=1.8901  , p=0.1568  , df_denom=93, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=1.3345  , p=0.2682  , df_denom=90, df_num=3
ssr based chi2 test:   chi2=4.3148  , p=0.2294  , df=3
likelihood ratio test: chi2=4.2216  , p=0.2385  , df=3
parameter F test:         F=1.3345  , p=0.2682  , df_denom=90, df_num=3


{1: ({'ssr_ftest': (2.643628887488964, 0.10724425579054463, 96.0, 1),
   'ssr_chi2test': (2.7262422902229946, 0.09871129004194132, 1),
   'lrtest': (2.689380136046964, 0.10101910537563244, 1),
   'params_ftest': (2.643628887488961, 0.10724425579054463, 96.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c9d422cf08>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c9d4227888>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (1.8900518352565525, 0.15682101081150362, 93.0, 2),
   'ssr_chi2test': (3.9833350506482184, 0.13646767230674187, 2),
   'lrtest': (3.9045100620858193, 0.14195360061028872, 2),
   'params_ftest': (1.8900518352565323, 0.15682101081150715, 93.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c9d4227308>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c9d402dc48>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])]),
 3: ({'ssr_ftest': (1.33448959233