<a href="https://colab.research.google.com/github/Belac44/Deep-Learning/blob/main/Endogeneity_Problems.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import pandas as pd
import matplotlib as mp
import statsmodels.api as sm

from statsmodels.sandbox.regression.gmm import IV2SLS 
# There is a package named IV2SLS in Python. Do not use this package! The exogenous explanatory variables must
# be entered as instruments. So it gives wrong answers
from statsmodels.sandbox.regression.gmm import GMM

In [7]:
input_table = pd.read_csv('sri.csv')
input_table.head()

Unnamed: 0,Constant,Stock Change,Inventory Turnover,Operating Profit,Interaction Effect,Current Ratio,Quick Ratio,Debt Asset Ratio
0,1,0.870332,1.795946,0.115846,0.208053,1.672527,0.255171,0.473317
1,1,-0.047347,1.395501,0.436967,0.609788,1.637261,0.221763,0.489967
2,1,0.001176,1.664563,0.541016,0.900555,1.640619,0.189141,0.374269
3,1,-0.9012,1.605738,0.539399,0.866133,1.436221,0.131944,0.224399
4,1,-0.176353,1.591451,0.539938,0.859285,1.43314,0.183095,0.213446


In [16]:
input_table.shape

(1696, 9)

In [8]:
model_iv = sm.OLS(input_table["Inventory Turnover"],input_table[["Constant","Current Ratio","Quick Ratio",\
                                                                 "Debt Asset Ratio"]]).fit()
endog_predict = model_iv.predict(input_table[["Constant","Current Ratio","Quick Ratio","Debt Asset Ratio"]])
input_table["Endogenous Param"] = endog_predict

In [9]:
model_2sls = sm.OLS(input_table["Stock Change"], input_table[["Constant","Endogenous Param",\
                                                              "Operating Profit","Interaction Effect",\
                                                             ]]).fit()
model_2sls.summary()

0,1,2,3
Dep. Variable:,Stock Change,R-squared:,0.015
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,8.53
Date:,"Sat, 05 Nov 2022",Prob (F-statistic):,1.27e-05
Time:,17:58:05,Log-Likelihood:,-1186.5
No. Observations:,1696,AIC:,2381.0
Df Residuals:,1692,BIC:,2403.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Constant,-0.0176,0.020,-0.896,0.370,-0.056,0.021
Endogenous Param,0.0011,0.001,1.827,0.068,-7.76e-05,0.002
Operating Profit,-0.1201,0.028,-4.319,0.000,-0.175,-0.066
Interaction Effect,0.0014,0.000,3.621,0.000,0.001,0.002

0,1,2,3
Omnibus:,368.832,Durbin-Watson:,2.243
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3433.92
Skew:,0.742,Prob(JB):,0.0
Kurtosis:,9.811,Cond. No.,109.0


In [18]:
y_vals  = np.array(input_table["Stock Change"])
x_vals  = np.array(input_table[["Inventory Turnover","Operating Profit","Interaction Effect"]])
iv_vals = np.array(input_table[["Current Ratio","Quick Ratio","Debt Asset Ratio"]])

#. Endogeneity bias can cause inconsistent estimates (i.e., not tend to be the true value as sample size 
# increases), which potentially leads to wrong inferences, misleading conclusions and incorrect 
# theoretical interpretations.

#The error term in endogeneity bias is unobservable, so there is no direct way to statistically test 
# that an endogenous variable is correlated with the error term
class gmm(GMM):
    def momcond(self, params):
        p0, p1, p2, p3 = params
        endog = self.endog
        exog = self.exog
        inst = self.instrument   

        error0 = endog - p0 - p1 * exog[:,0] - p2 * exog[:,1] - p3 * exog[:,2] #(1696, )
        error1 = (endog - p0 - p1 * exog[:,0] - p2 * exog[:,1] - p3 * exog[:,2]) * exog[:,1] #(1696, )
        error2 = (endog - p0 - p1 * exog[:,0] - p2 * exog[:,1] - p3 * exog[:,2]) * exog[:,2] #(1696, )
        error3 = (endog - p0 - p1 * exog[:,0] - p2 * exog[:,1] - p3 * exog[:,2]) * inst[:,0] #(1696, )
        error4 = (endog - p0 - p1 * exog[:,0] - p2 * exog[:,1] - p3 * exog[:,2]) * inst[:,1] #(1696, )
        error5 = (endog - p0 - p1 * exog[:,0] - p2 * exog[:,1] - p3 * exog[:,2]) * inst[:,2] #(1696, )

        input_table["Error 0"] = error0
        input_table["Error 1"] = error1
        input_table["Error 2"] = error2
        input_table["Error 3"] = error3
        input_table["Error 4"] = error4
        input_table["Error 5"] = error5

        g = np.column_stack((error0, error1, error2, error3, error4, error5)) #(1696, 6)
        return g


beta0 = np.array([0.1, 0.1, 0.1, 0.1])
res = gmm(endog = y_vals, exog = x_vals, instrument = iv_vals, k_moms=6, k_params=4).fit(beta0)

res.summary()

Optimization terminated successfully.
         Current function value: 0.000046
         Iterations: 8
         Function evaluations: 12
         Gradient evaluations: 12
Optimization terminated successfully.
         Current function value: 0.000373
         Iterations: 7
         Function evaluations: 13
         Gradient evaluations: 13
Optimization terminated successfully.
         Current function value: 0.000372
         Iterations: 5
         Function evaluations: 9
         Gradient evaluations: 9
Optimization terminated successfully.
         Current function value: 0.000372
         Iterations: 5
         Function evaluations: 11
         Gradient evaluations: 11
Optimization terminated successfully.
         Current function value: 0.000372
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1


0,1,2,3
Dep. Variable:,y,Hansen J:,0.6317
Model:,gmm,Prob (Hansen J):,0.729
Method:,GMM,,
Date:,"Sat, 05 Nov 2022",,
Time:,18:12:34,,
No. Observations:,1696,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p 0,-0.0200,0.021,-0.964,0.335,-0.061,0.021
p 1,0.0011,0.001,1.843,0.065,-6.89e-05,0.002
p 2,-0.1071,0.032,-3.370,0.001,-0.169,-0.045
p 3,0.0011,0.000,2.760,0.006,0.000,0.002


In [19]:
input_table.head()
# The bias term should be a 6 by 3 matrix to enable proper matrix multiplication and obtain a 6 by 1 error term

Unnamed: 0,Constant,Stock Change,Inventory Turnover,Operating Profit,Interaction Effect,Current Ratio,Quick Ratio,Debt Asset Ratio,Endogenous Param,Error 0,Error 1,Error 2,Error 3,Error 4,Error 5
0,1,0.870332,1.795946,0.115846,0.208053,1.672527,0.255171,0.473317,10.724599,0.900588,0.104329,0.18737,1.506258,0.229804,0.426264
1,1,-0.047347,1.395501,0.436967,0.609788,1.637261,0.221763,0.489967,10.812673,0.017267,0.007545,0.010529,0.02827,0.003829,0.00846
2,1,0.001176,1.664563,0.541016,0.900555,1.640619,0.189141,0.374269,9.001338,0.076308,0.041284,0.068719,0.125192,0.014433,0.02856
3,1,-0.9012,1.605738,0.539399,0.866133,1.436221,0.131944,0.224399,11.995705,-0.826139,-0.445618,-0.715546,-1.186518,-0.109004,-0.185385
4,1,-0.176353,1.591451,0.539938,0.859285,1.43314,0.183095,0.213446,13.536649,-0.10121,-0.054647,-0.086969,-0.145049,-0.018531,-0.021603


In [20]:
#Assuming a matrix of the form 
#[[a b c],
# [d e f],
# [g h i],
# [j k l],
# [m n o],
# [p, q, r]] then, a + b + c == error0(column1), then d + e + f == error2(column1), g + h + 1 = error3(column1) ......
# With that, you can form a mtrix for each column that will epresent the bias.
# Since the bias can be obtained by dicing the result by the 3 by 1 ones matrix i.e([[1 1 1]]), the bias can be obtained
# finding the cube of the error term. Adding for the whole column, we get the following dataframe with the following biases

input_table["Bias"] = (input_table["Error 0"]*3 + input_table["Error 1"]*3 + input_table["Error 2"]*3 +  input_table["Error 3"]*3 + 
input_table["Error 4"]*3 + input_table["Error 5"]*3 )/6


In [21]:
input_table.head()

Unnamed: 0,Constant,Stock Change,Inventory Turnover,Operating Profit,Interaction Effect,Current Ratio,Quick Ratio,Debt Asset Ratio,Endogenous Param,Error 0,Error 1,Error 2,Error 3,Error 4,Error 5,Bias
0,1,0.870332,1.795946,0.115846,0.208053,1.672527,0.255171,0.473317,10.724599,0.900588,0.104329,0.18737,1.506258,0.229804,0.426264,1.677306
1,1,-0.047347,1.395501,0.436967,0.609788,1.637261,0.221763,0.489967,10.812673,0.017267,0.007545,0.010529,0.02827,0.003829,0.00846,0.03795
2,1,0.001176,1.664563,0.541016,0.900555,1.640619,0.189141,0.374269,9.001338,0.076308,0.041284,0.068719,0.125192,0.014433,0.02856,0.177248
3,1,-0.9012,1.605738,0.539399,0.866133,1.436221,0.131944,0.224399,11.995705,-0.826139,-0.445618,-0.715546,-1.186518,-0.109004,-0.185385,-1.734105
4,1,-0.176353,1.591451,0.539938,0.859285,1.43314,0.183095,0.213446,13.536649,-0.10121,-0.054647,-0.086969,-0.145049,-0.018531,-0.021603,-0.214005


In [23]:
# This bias can now be incoporated into the instrumental variale moment expression
input_table.to_csv("finaldata.csv")