# Collinearity Diagnostics

In [1]:
#=============================================================================================
# CODE NAME     : Collinearity Diagnostics.py
# PURPOSE       : Demonstrate application of multicollinearity using Pandas and statsmodels 
# APPLICATION   : Analyzing Fitness dataset
#==============================================================================================

In [1]:
# Load Input data
import numpy as np
import pandas as pd

fitness = pd.read_excel("C:\\Users\\Training\\Data Science using SAS and Python\\Data\\fitness.xlsx")
fitness.head()

Unnamed: 0,Name,Gender,RunTime,Age,Weight,Oxygen_Consumption,Run_Pulse,Rest_Pulse,Maximum_Pulse,Performance
0,Donna,F,8.17,42,68.15,59.57,166,40,172,90
1,Gracie,F,8.63,38,81.87,60.06,170,48,186,94
2,Luanne,F,8.65,43,85.84,54.3,156,45,168,83
3,Mimi,F,8.92,50,70.87,54.63,146,48,155,67
4,Chris,M,8.95,49,81.42,49.16,180,44,185,72


In [4]:
# Fit the full model with all the predictor variables
from statsmodels.formula.api import ols
full_model = ols('Oxygen_Consumption ~ RunTime + Age + Weight + Run_Pulse + Rest_Pulse + Maximum_Pulse + Performance', 
                 data = fitness).fit()
full_model.summary()

0,1,2,3
Dep. Variable:,Oxygen_Consumption,R-squared:,0.849
Model:,OLS,Adj. R-squared:,0.803
Method:,Least Squares,F-statistic:,18.42
Date:,"Sat, 12 Jun 2021",Prob (F-statistic):,4.9e-08
Time:,11:38:20,Log-Likelihood:,-66.075
No. Observations:,31,AIC:,148.1
Df Residuals:,23,BIC:,159.6
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,131.7825,72.208,1.825,0.081,-17.590,281.155
RunTime,-3.8602,2.937,-1.315,0.202,-9.935,2.215
Age,-0.4608,0.587,-0.786,0.440,-1.674,0.753
Weight,-0.0581,0.069,-0.843,0.408,-0.201,0.084
Run_Pulse,-0.3621,0.123,-2.938,0.007,-0.617,-0.107
Rest_Pulse,-0.0151,0.068,-0.222,0.826,-0.156,0.126
Maximum_Pulse,0.3010,0.140,2.153,0.042,0.012,0.590
Performance,-0.1262,0.301,-0.419,0.679,-0.749,0.496

0,1,2,3
Omnibus:,2.445,Durbin-Watson:,1.773
Prob(Omnibus):,0.294,Jarque-Bera (JB):,1.302
Skew:,0.033,Prob(JB):,0.522
Kurtosis:,4.002,Cond. No.,46100.0


# Get the VIF of all the predictors

In [6]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

variables = full_model.model.exog
vif = [variance_inflation_factor(variables, i) for i in range(variables.shape[1])]
pd.DataFrame(vif)

Unnamed: 0,0
0,28841.926715
1,88.862507
2,51.011759
3,1.763833
4,8.544976
5,1.444246
6,8.78755
7,162.853987


In [5]:
# Fit the model without Performance variable
from statsmodels.formula.api import ols
without_performance_model = ols('Oxygen_Consumption ~ RunTime + Age + Weight + Run_Pulse + Rest_Pulse + Maximum_Pulse', 
                                data = fitness).fit()
without_performance_model.summary()

0,1,2,3
Dep. Variable:,Oxygen_Consumption,R-squared:,0.847
Model:,OLS,Adj. R-squared:,0.809
Method:,Least Squares,F-statistic:,22.23
Date:,"Sat, 12 Jun 2021",Prob (F-statistic):,1.06e-08
Time:,11:41:19,Log-Likelihood:,-66.193
No. Observations:,31,AIC:,146.4
Df Residuals:,24,BIC:,156.4
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,101.9631,12.272,8.309,0.000,76.635,127.291
RunTime,-2.6399,0.385,-6.851,0.000,-3.435,-1.845
Age,-0.2185,0.099,-2.218,0.036,-0.422,-0.015
Weight,-0.0750,0.055,-1.366,0.185,-0.188,0.038
Run_Pulse,-0.3672,0.121,-3.047,0.006,-0.616,-0.119
Rest_Pulse,-0.0195,0.066,-0.295,0.771,-0.156,0.117
Maximum_Pulse,0.3046,0.137,2.221,0.036,0.022,0.588

0,1,2,3
Omnibus:,2.502,Durbin-Watson:,1.741
Prob(Omnibus):,0.286,Jarque-Bera (JB):,1.345
Skew:,-0.094,Prob(JB):,0.51
Kurtosis:,4.003,Cond. No.,7790.0


In [8]:
variables = without_performance_model.model.exog
vif = [variance_inflation_factor(variables, i) for i in range(variables.shape[1])]
pd.DataFrame(vif)

Unnamed: 0,0
0,862.676948
1,1.584325
2,1.489533
3,1.159726
4,8.46034
5,1.410043
6,8.755346


In [13]:
variables

array([[  1.  ,   8.17,  42.  ,  68.15, 166.  ,  40.  , 172.  ],
       [  1.  ,   8.63,  38.  ,  81.87, 170.  ,  48.  , 186.  ],
       [  1.  ,   8.65,  43.  ,  85.84, 156.  ,  45.  , 168.  ],
       [  1.  ,   8.92,  50.  ,  70.87, 146.  ,  48.  , 155.  ],
       [  1.  ,   8.95,  49.  ,  81.42, 180.  ,  44.  , 185.  ],
       [  1.  ,   9.22,  38.  ,  89.02, 178.  ,  55.  , 180.  ],
       [  1.  ,   9.4 ,  49.  ,  76.32, 186.  ,  56.  , 188.  ],
       [  1.  ,   9.63,  52.  ,  76.32, 164.  ,  48.  , 166.  ],
       [  1.  ,   9.93,  57.  ,  59.08, 148.  ,  49.  , 155.  ],
       [  1.  ,  10.  ,  51.  ,  77.91, 162.  ,  48.  , 168.  ],
       [  1.  ,  10.07,  40.  ,  75.07, 185.  ,  62.  , 185.  ],
       [  1.  ,  10.08,  49.  ,  73.37, 168.  ,  67.  , 168.  ],
       [  1.  ,  10.13,  44.  ,  73.03, 168.  ,  45.  , 168.  ],
       [  1.  ,  10.25,  48.  ,  91.63, 162.  ,  48.  , 164.  ],
       [  1.  ,  10.33,  54.  ,  83.12, 166.  ,  50.  , 170.  ],
       [  1.  ,  10.47,  