In [None]:
'''
Introdução à Econometria - Uma abordagem moderna (Tradução da 6 edição norte-americana)
Autor: WOOLDRIDGE, J.
Editora: CENGAGE LEARNING

Cap. 8: Heterocedasticidade (Heteroskedasticity)
Exemplo 8.1: Equação log dos salário com erros padrão robustos em relação à heterocedasticidade
             (LOG WAGE EQUATIONS WITH HETEROSKEDASTICITY-ROBUST STANDARD ERRORS)
             
Arquivo com os dados: wage1.xls

Arquivo com dados em:
http://students.cengage.com.br/dashboard/private/livroView.jsf;jsessionid=95E9AD889A4A4B7ABBD2A5251F1E14BE?id=104577

Em caso de dúvidas ou problemas, solicitamos, por gentileza, entrar em contato pelo e-mail:
python.economia@gmail.com
'''

In [1]:
import pandas as pd
import statsmodels.formula.api as smf

import statsmodels.iolib.summary2 as sis

In [2]:
df = pd.read_excel('wage1.xls',
                   header=None,
                   usecols=[1, 2, 3, 5, 6, 21, 22, 23],
                   names=['educ', 'exper', 'tenure', 'female', 'married', 'log_wage', 'expersq', 'tenuresq'])

In [3]:
df.head(2)

Unnamed: 0,educ,exper,tenure,female,married,log_wage,expersq,tenuresq
0,11,2,0,1,0,1.131402,4,0
1,12,22,2,1,1,1.175573,484,4


In [4]:
df['male'] = df['female'].map({0: 1, 1: 0}) #cria uma coluna em que 1 indica "homem" e 0 indica "mulher"
df['single'] = df['married'].map({0: 1, 1: 0}) #cria uma coluna em que 1 indica "solteiro" e 0 indica "casado(a)"

In [5]:
df.head()

Unnamed: 0,educ,exper,tenure,female,married,log_wage,expersq,tenuresq,male,single
0,11,2,0,1,0,1.131402,4,0,0,1
1,12,22,2,1,1,1.175573,484,4,0,0
2,11,2,0,0,0,1.098612,4,0,1,1
3,8,44,28,0,1,1.791759,1936,784,1,0
4,12,7,2,0,1,1.667707,49,4,1,0


In [6]:
df['marmale'] = df['married'] * df['male'] #cria uma coluna em que 1 indica "homem casado"
df['marfem'] = df['married'] * df['female'] #cria uma coluna em que 1 indica "mulher casada"
df['singfem'] = df['single'] * df['female'] #cria uma coluna em que 1 indica "mulher solteira"

In [7]:
df.head()

Unnamed: 0,educ,exper,tenure,female,married,log_wage,expersq,tenuresq,male,single,marmale,marfem,singfem
0,11,2,0,1,0,1.131402,4,0,0,1,0,0,1
1,12,22,2,1,1,1.175573,484,4,0,0,0,1,0
2,11,2,0,0,0,1.098612,4,0,1,1,0,0,0
3,8,44,28,0,1,1.791759,1936,784,1,0,1,0,0
4,12,7,2,0,1,1.667707,49,4,1,0,1,0,0


### Solução sugerida 1

In [8]:
model = smf.ols('log_wage ~ marmale + marfem + singfem + educ + exper + expersq + tenure + tenuresq', data=df)
reg = model.fit()

In [9]:
tabela = pd.DataFrame()
tabela['params_MQO'] = round(reg.params, 4)
tabela['se_MQO'] = round(reg.bse, 4)
tabela['se_robust'] = round(reg.HC0_se, 4)

print(tabela)
print(f"\nNúm. Obs.: {int(reg.nobs)}\nR_quadrado: {round(reg.rsquared, 3)}")

           params_MQO  se_MQO  se_robust
Intercept      0.3214  0.1000     0.1085
marmale        0.2127  0.0554     0.0567
marfem        -0.1983  0.0578     0.0583
singfem       -0.1104  0.0557     0.0566
educ           0.0789  0.0067     0.0074
exper          0.0268  0.0052     0.0051
expersq       -0.0005  0.0001     0.0001
tenure         0.0291  0.0068     0.0069
tenuresq      -0.0005  0.0002     0.0002

Núm. Obs.: 526
R_quadrado: 0.461


### Solução sugerida 2

In [10]:
model = smf.ols('log_wage ~ marmale + marfem + singfem + educ + exper + expersq + tenure + tenuresq', data=df)
reg = model.fit()
reg_robust = model.fit(cov_type='HC0')

In [11]:
results_table = sis.summary_col(results=[reg,reg_robust],
                            float_format='%0.3f',
                            stars = False,
                            model_names=['se_usual',
                                         'se_robusto'])

results_table.add_title('Exemplo 8.1')
results_table.add_text(f'Núm. de obs.: {int(model.nobs)}')
print(results_table)

           Exemplo 8.1
               se_usual se_robusto
----------------------------------
Intercept      0.321    0.321     
               (0.100)  (0.109)   
marmale        0.213    0.213     
               (0.055)  (0.057)   
marfem         -0.198   -0.198    
               (0.058)  (0.058)   
singfem        -0.110   -0.110    
               (0.056)  (0.057)   
educ           0.079    0.079     
               (0.007)  (0.007)   
exper          0.027    0.027     
               (0.005)  (0.005)   
expersq        -0.001   -0.001    
               (0.000)  (0.000)   
tenure         0.029    0.029     
               (0.007)  (0.007)   
tenuresq       -0.001   -0.001    
               (0.000)  (0.000)   
R-squared      0.461    0.461     
R-squared Adj. 0.453    0.453     
Standard errors in parentheses.
Núm. de obs.: 526
