# ECONOMETRIA

In [3]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
from linearmodels.iv import IV2SLS as IV

In [4]:
import wooldridge

In [5]:
df = wooldridge.data('mroz')

In [6]:
df.head()

Unnamed: 0,inlf,hours,kidslt6,kidsge6,age,educ,wage,repwage,hushrs,husage,...,faminc,mtr,motheduc,fatheduc,unem,city,exper,nwifeinc,lwage,expersq
0,1,1610,1,0,32,12,3.354,2.65,2708,34,...,16310.0,0.7215,12,7,5.0,0,14,10.91006,1.210154,196
1,1,1656,0,2,30,12,1.3889,2.65,2310,30,...,21800.0,0.6615,7,7,11.0,1,5,19.499981,0.328512,25
2,1,1980,1,3,35,12,4.5455,4.04,3072,40,...,21040.0,0.6915,12,7,5.0,0,15,12.03991,1.514138,225
3,1,456,0,3,34,12,1.0965,3.25,1920,53,...,7300.0,0.7815,7,7,5.0,0,6,6.799996,0.092123,36
4,1,1568,1,2,31,14,4.5918,3.6,2000,32,...,27300.0,0.6215,12,14,9.5,1,7,20.100058,1.524272,49


In [7]:
df.shape

(753, 22)

In [8]:
df.isna().sum()

inlf          0
hours         0
kidslt6       0
kidsge6       0
age           0
educ          0
wage        325
repwage       0
hushrs        0
husage        0
huseduc       0
huswage       0
faminc        0
mtr           0
motheduc      0
fatheduc      0
unem          0
city          0
exper         0
nwifeinc      0
lwage       325
expersq       0
dtype: int64

In [9]:
#df = df[df['lwage'].notna()]
df = df.dropna()

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 428 entries, 0 to 427
Data columns (total 22 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   inlf      428 non-null    int64  
 1   hours     428 non-null    int64  
 2   kidslt6   428 non-null    int64  
 3   kidsge6   428 non-null    int64  
 4   age       428 non-null    int64  
 5   educ      428 non-null    int64  
 6   wage      428 non-null    float64
 7   repwage   428 non-null    float64
 8   hushrs    428 non-null    int64  
 9   husage    428 non-null    int64  
 10  huseduc   428 non-null    int64  
 11  huswage   428 non-null    float64
 12  faminc    428 non-null    float64
 13  mtr       428 non-null    float64
 14  motheduc  428 non-null    int64  
 15  fatheduc  428 non-null    int64  
 16  unem      428 non-null    float64
 17  city      428 non-null    int64  
 18  exper     428 non-null    int64  
 19  nwifeinc  428 non-null    float64
 20  lwage     428 non-null    float6

In [11]:
df.isna().sum()

inlf        0
hours       0
kidslt6     0
kidsge6     0
age         0
educ        0
wage        0
repwage     0
hushrs      0
husage      0
huseduc     0
huswage     0
faminc      0
mtr         0
motheduc    0
fatheduc    0
unem        0
city        0
exper       0
nwifeinc    0
lwage       0
expersq     0
dtype: int64

## MC2E

In [12]:
Y1 = df['lwage']
Y2 = df['educ']
X = df[['exper' , 'expersq']]
Z = df[['motheduc' , 'fatheduc']]

In [13]:
exog_inst = [ X , Z]
reg1e = pd.concat(exog_inst , axis=1)
exog_MCO = [ X , Y2]
regMCO =  pd.concat(exog_MCO , axis=1)

In [14]:
reg1e

Unnamed: 0,exper,expersq,motheduc,fatheduc
0,14,196,12,7
1,5,25,7,7
2,15,225,12,7
3,6,36,7,7
4,7,49,12,14
...,...,...,...,...
423,2,4,7,7
424,21,441,7,7
425,22,484,7,7
426,14,196,12,16


In [15]:
model_1e = sm.OLS(df.educ, sm.add_constant(reg1e))

In [16]:
result_1e = model_1e.fit()

In [17]:
print(result_1e.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.211
Model:                            OLS   Adj. R-squared:                  0.204
Method:                 Least Squares   F-statistic:                     28.36
Date:                Sat, 02 Sep 2023   Prob (F-statistic):           6.87e-21
Time:                        18:49:43   Log-Likelihood:                -909.72
No. Observations:                 428   AIC:                             1829.
Df Residuals:                     423   BIC:                             1850.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          9.1026      0.427     21.340      0.0

In [20]:
test_F = result_1e.f_test('fatheduc = 0 , motheduc = 0 ' )

print(test_F)

<F test: F=55.400300427777104, p=4.268908724631867e-22, df_denom=423, df_num=2>


In [21]:
df['educ_hat'] = result_1e.predict()

In [22]:
reg2e = pd.concat([X , df.educ_hat] , axis=1 )

In [23]:
reg2e.head()

Unnamed: 0,exper,expersq,educ_hat
0,14,196,12.756017
1,5,25,11.733558
2,15,225,12.771979
3,6,36,11.767683
4,7,49,13.914615


Ahora, construimos el modelo tomando como variable dependiente la variable dependiente de la ecuación original(y1) y como variables explicativas las variables explicativas exógenas de la ecuaciónoriginal (las x, siguiendo la notación inicial) y la variable estimada en la primera etapa(¡ ˆy2 y no y2!).


In [24]:
model_2e = sm.OLS(Y1, sm.add_constant(reg2e))

result_2e = model_2e.fit()

print(result_2e.summary())

                            OLS Regression Results                            
Dep. Variable:                  lwage   R-squared:                       0.050
Model:                            OLS   Adj. R-squared:                  0.043
Method:                 Least Squares   F-statistic:                     7.405
Date:                Sat, 02 Sep 2023   Prob (F-statistic):           7.62e-05
Time:                        18:49:53   Log-Likelihood:                -457.17
No. Observations:                 428   AIC:                             922.3
Df Residuals:                     424   BIC:                             938.6
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0481      0.420      0.115      0.9

Esta forma manual de realizar el procedimiento de MCO en 2 etapas es solo explicativa porque contiene ee erroreos. Es necesario utilizar la función contenida en el paquete para que calcule automaticamente las 2 etapas conteniendo la correccion de los errores tambien

In [25]:
MC2E = IV(dependent= Y1 , exog = sm.add_constant(X) , endog = Y2, instruments = Z)

In [26]:
resultado = MC2E.fit()

In [27]:
print(resultado.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                  lwage   R-squared:                      0.1357
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1296
No. Observations:                 428   F-statistic:                    18.611
Date:                Sat, Sep 02 2023   P-value (F-stat)                0.0003
Time:                        18:49:55   Distribution:                  chi2(3)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const          0.0481     0.4278     0.1124     0.9105     -0.7903      0.8865
exper          0.0442     0.0155     2.8546     0.00

In [28]:
from linearmodels.iv import compare

In [29]:
result_MCO = IV(dependent = Y1, exog = sm.add_constant(regMCO) , endog = None , instruments = None).fit()

In [30]:
result_man = IV(dependent = Y1, exog = sm.add_constant(reg2e) , endog = None , instruments = None).fit()

In [31]:
result_MC2E = IV(dependent = Y1, exog = sm.add_constant(X) , endog = Y2 , instruments = Z).fit()

In [32]:
resultados_comp = compare({'MCO':result_MCO , 'MC2E manual' : result_man , 'MC2E' : result_MC2E}, stars = True , precision = 'std_errors')

In [33]:
print(resultados_comp)

                        Model Comparison                       
                                MCO   MC2E manual          MC2E
---------------------------------------------------------------
Dep. Variable                 lwage         lwage         lwage
Estimator                       OLS           OLS       IV-2SLS
No. Observations                428           428           428
Cov. Est.                    robust        robust        robust
R-squared                    0.1568        0.0498        0.1357
Adj. R-squared               0.1509        0.0431        0.1296
F-statistic                  82.671        17.111        18.611
P-value (F-stat)             0.0000        0.0007        0.0003
const                    -0.5220***        0.0481        0.0481
                           (0.2007)      (0.4492)      (0.4278)
exper                     0.0416***     0.0442***     0.0442***
                           (0.0152)      (0.0163)      (0.0155)
expersq                    -0.0008*     

In [34]:
help(resultados_comp)

Help on IVModelComparison in module linearmodels.iv.results object:

class IVModelComparison(linearmodels.shared.base._ModelComparison)
 |  IVModelComparison(results: 'Sequence[AnyResult] | dict[str, AnyResult]', *, precision: 'str' = 'tstats', stars: 'bool' = False)
 |  
 |  Comparison of multiple models
 |  
 |  Parameters
 |  ----------
 |  results : {list, dict}
 |      Set of results to compare.  If a dict, the keys will be used as model
 |      names.
 |  precision : {"tstats","std_errors", "std-errors", "pvalues"}
 |      Estimator precision estimator to include in the comparison output.
 |      Default is "tstats".
 |  stars : bool
 |      Add stars based on the p-value of the coefficient where 1, 2 and
 |      3-stars correspond to p-values of 10%, 5% and 1%, respectively.
 |  
 |  Method resolution order:
 |      IVModelComparison
 |      linearmodels.shared.base._ModelComparison
 |      linearmodels.shared.base._SummaryStr
 |      builtins.object
 |  
 |  Methods defined her