In [151]:
!pip install linearmodels
!pip install wooldridge



In [152]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import wooldridge as wd
from scipy import stats
from linearmodels.iv import IV2SLS

def Regressao_Multipla(x, y, constante = "S", cov = "normal"):
    global Resultado, Lista_ychapeu, Resíduos, SQR, EPR

    #adicionando uma constante ao modelo OLS
    if constante == "S":
        X = sm.add_constant(x)
    else:
        X = x

    #Modelo levando em conta a opção de erros padrão
    Modelo = sm.OLS(y,X)

    if cov == "robust":
        Resultado = Modelo.fit(cov_type = 'HC1', use_t = True)
    elif cov == "cluster" or cov == "clustered":
        group = str(input("Qual o rótulo da coluna de grupo?"))
        try:
            Resultado = Modelo.fit(cov_type = 'cluster',cov_kwds  ={'groups':df[group]}, use_t = True)
        except:
            erro = "Não foi possível encontrar o grupo selecionado. Tente novamente!"
            return erro
    else:
        Resultado = Modelo.fit()

    Lista_ychapeu = Resultado.predict()
    Resíduos = y - Lista_ychapeu

    #calculando o Erro Padrão da Regressão (EPR)
    SQR =sum([i**2 for i in Resíduos])
    Número_de_Observações = len(y)
    GL = Número_de_Observações - len(Resultado.params)
    VarianciaReg = SQR/GL
    EPR = math.sqrt(VarianciaReg)

    ##Printando o Resultado
    print(Resultado.summary())

def Regressao_IV_MQ2E(exog, endog, instrumentos, y, constante="S",cov='normal'):
    global df, Resultado
    ## formando o vetor de variáveis exógenas
    if constante == "S":
        try:
            exog = sm.add_constant(exog)
        except Exception: ## se não houver exógenas no modelo
            exog = np.resize([1],endog.shape[0])
            exog = pd.DataFrame({'const':exog})
    else:
        exog = exog

    ## criando o modelo levando em conta a opção de covariância
    Modelo = IV2SLS(y,exog,endog,instrumentos)
    if cov == "robust":
        Resultado = Modelo.fit(cov_type = 'robust')
    elif cov == 'kernel': ## correlação robusta à heteroscedasticidade e autocorrelação serial
        Resultado = Modelo.fit(cov_type = 'kernel')
    elif cov == 'clustered' or cov == 'cluster':
        Resultado = Modelo.fit(cov_type = 'clustered', cluster_entity = True)
    else:
        Resultado = Modelo.fit(cov_type='unadjusted')

    print(Resultado)

## Exercício C2

In [153]:
df = wd.data('fertil2')

In [154]:
# estimando o modelo proposto por MQO
exog = df[['educ','age','agesq']]
endog = None
instrumentos = None
y = df['children']

modelo = Regressao_IV_MQ2E(exog= exog, endog= endog, instrumentos= instrumentos, y=y)

                            OLS Estimation Summary                            
Dep. Variable:               children   R-squared:                      0.5687
Estimator:                        OLS   Adj. R-squared:                 0.5684
No. Observations:                4361   F-statistic:                    5750.9
Date:                Sun, May 26 2024   P-value (F-stat)                0.0000
Time:                        20:28:37   Distribution:                  chi2(3)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const         -4.1383     0.2405    -17.208     0.0000     -4.6096     -3.6670
educ          -0.0906     0.0059    -15.305     0.00

In [155]:
## sendo frsthalf exógena, resta testar sua relevância
exog = df[['frsthalf','age','agesq']]
endog = None
instrumentos = None
y = df['educ']

Regressao_IV_MQ2E(exog= exog, endog= endog, instrumentos= instrumentos, y=y)

                            OLS Estimation Summary                            
Dep. Variable:                   educ   R-squared:                      0.1077
Estimator:                        OLS   Adj. R-squared:                 0.1070
No. Observations:                4361   F-statistic:                    526.10
Date:                Sun, May 26 2024   P-value (F-stat)                0.0000
Time:                        20:28:37   Distribution:                  chi2(3)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const          9.6929     0.5978     16.214     0.0000      8.5212      10.865
frsthalf      -0.8523     0.1128    -7.5572     0.00

In [156]:
# estimando o modelo proposto por MQ2E
exog = df[['age','agesq']]
endog = df[['educ']]
instrumentos = df[['frsthalf']]
y = df['children']

Regressao_IV_MQ2E(exog= exog, endog= endog, instrumentos= instrumentos, y=y)

                          IV-2SLS Estimation Summary                          
Dep. Variable:               children   R-squared:                      0.5502
Estimator:                    IV-2SLS   Adj. R-squared:                 0.5499
No. Observations:                4361   F-statistic:                    5300.2
Date:                Sun, May 26 2024   P-value (F-stat)                0.0000
Time:                        20:28:37   Distribution:                  chi2(3)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const         -3.3878     0.5479    -6.1833     0.0000     -4.4617     -2.3139
age            0.3236     0.0179     18.128     0.00

In [157]:
# adicionando dummies exógenas e estimando por MQ2E
exog = df[['age','agesq','electric','tv','bicycle']]
endog = df[['educ']]
instrumentos = df[['frsthalf']]
y = df['children']

Regressao_IV_MQ2E(exog= exog, endog= endog, instrumentos= instrumentos, y=y)

                          IV-2SLS Estimation Summary                          
Dep. Variable:               children   R-squared:                      0.5577
Estimator:                    IV-2SLS   Adj. R-squared:                 0.5571
No. Observations:                4356   F-statistic:                    5539.2
Date:                Sun, May 26 2024   P-value (F-stat)                0.0000
Time:                        20:28:37   Distribution:                  chi2(6)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const         -3.5913     0.6446    -5.5717     0.0000     -4.8547     -2.3280
age            0.3281     0.0190     17.231     0.00

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(


In [158]:
## adicionando dummies exógenas e estimando por MQ0
exog = df[['age','agesq','electric','tv','bicycle','educ']]
endog = None
instrumentos = None
y = df['children']

Regressao_IV_MQ2E(exog= exog, endog= endog, instrumentos= instrumentos, y=y)

                            OLS Estimation Summary                            
Dep. Variable:               children   R-squared:                      0.5761
Estimator:                        OLS   Adj. R-squared:                 0.5755
No. Observations:                4356   F-statistic:                    5919.0
Date:                Sun, May 26 2024   P-value (F-stat)                0.0000
Time:                        20:28:37   Distribution:                  chi2(6)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const         -4.3898     0.2401    -18.281     0.0000     -4.8604     -3.9191
age            0.3402     0.0164     20.708     0.00

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(


## Exercício C9

In [159]:
df = wd.data('wage2')

In [160]:
## fazendo o modelo por mq2e
exog = df[['exper','tenure','black']]
endog = df[['educ']]
instrumentos = df[['sibs']]
y = df['lwage']

Regressao_IV_MQ2E(exog= exog, endog= endog, instrumentos= instrumentos, y=y)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                  lwage   R-squared:                      0.1685
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1650
No. Observations:                 935   F-statistic:                    100.22
Date:                Sun, May 26 2024   P-value (F-stat)                0.0000
Time:                        20:28:37   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const          5.2160     0.5420     9.6236     0.0000      4.1537      6.2783
exper          0.0209     0.0084     2.5010     0.01

In [161]:
x = df[['sibs','exper','tenure','black']]
y = df['educ']

Regressao_Multipla(x,y)

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.264
Model:                            OLS   Adj. R-squared:                  0.261
Method:                 Least Squares   F-statistic:                     83.48
Date:                Sun, 26 May 2024   Prob (F-statistic):           1.38e-60
Time:                        20:28:37   Log-Likelihood:                -1918.6
No. Observations:                 935   AIC:                             3847.
Df Residuals:                     930   BIC:                             3871.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         16.4943      0.195     84.541      0.0

In [162]:
df['educ_chapeu'] = Lista_ychapeu

x = df[['exper','tenure','black','educ_chapeu']]
y = df['lwage']

Regressao_Multipla(x,y)

                            OLS Regression Results                            
Dep. Variable:                  lwage   R-squared:                       0.089
Model:                            OLS   Adj. R-squared:                  0.085
Method:                 Least Squares   F-statistic:                     22.75
Date:                Sun, 26 May 2024   Prob (F-statistic):           5.99e-18
Time:                        20:28:37   Log-Likelihood:                -474.00
No. Observations:                 935   AIC:                             958.0
Df Residuals:                     930   BIC:                             982.2
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           5.2160      0.569      9.170      

In [163]:
x = df[['sibs']]
y = df['educ']

Regressao_Multipla(x,y)

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.057
Model:                            OLS   Adj. R-squared:                  0.056
Method:                 Least Squares   F-statistic:                     56.67
Date:                Sun, 26 May 2024   Prob (F-statistic):           1.22e-13
Time:                        20:28:37   Log-Likelihood:                -2034.4
No. Observations:                 935   AIC:                             4073.
Df Residuals:                     933   BIC:                             4083.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         14.1388      0.113    124.969      0.0

In [164]:
df['educ_chapeu'] = Lista_ychapeu

x = df[['exper','tenure','black','educ_chapeu']]
y = df['lwage']

Regressao_Multipla(x,y)

                            OLS Regression Results                            
Dep. Variable:                  lwage   R-squared:                       0.089
Model:                            OLS   Adj. R-squared:                  0.085
Method:                 Least Squares   F-statistic:                     22.75
Date:                Sun, 26 May 2024   Prob (F-statistic):           5.99e-18
Time:                        20:28:37   Log-Likelihood:                -474.00
No. Observations:                 935   AIC:                             958.0
Df Residuals:                     930   BIC:                             982.2
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           5.7710      0.360     16.014      