In [None]:
'''
Introdução à Econometria - Uma abordagem moderna (Tradução da 6 edição norte-americana)
Autor: WOOLDRIDGE, J. M.
Editora: CENGAGE LEARNING

Cap. 6: Análise de regressão múltipla: problemas adicionais (Multiple Regression Analysis)
Exemplo 6.1: Efeitos da poluição sobre os preços de imóveis
             (EFFECTS OF POLLUTION ON HOUSING PRICES)
             
Arquivo com os dados: hprice2.xls

Arquivo com dados em:
http://students.cengage.com.br/dashboard/private/livroView.jsf;jsessionid=95E9AD889A4A4B7ABBD2A5251F1E14BE?id=104577

Em caso de dúvidas ou problemas, solicitamos, por gentileza, entrar em contato pelo e-mail:
python.economia@gmail.com
'''

In [1]:
import pandas as pd
import statsmodels.formula.api as smf

In [2]:
df = pd.read_excel('hprice2.xls',
                   header=None,
                   usecols=[0, 1, 2, 3, 4, 7],
                   names=['price', 'crime', 'nox', 'rooms', 'dist', 'stratio'])

In [3]:
df.head()

Unnamed: 0,price,crime,nox,rooms,dist,stratio
0,24000,0.006,5.38,6.57,4.09,15.3
1,21599,0.027,4.69,6.42,4.97,17.8
2,34700,0.027,4.69,7.18,4.97,17.8
3,33400,0.032,4.58,7.0,6.06,18.7
4,36199,0.069,4.58,7.15,6.06,18.7


### Comandos sugeridos

In [4]:
# Cria o dataframe "zdf" com as variáveis padronizadas
zdf = (df - df.mean()) / df.std() 

In [5]:
zdf.head()

Unnamed: 0,price,crime,nox,rooms,dist,stratio
0,0.161637,-0.419724,-0.146567,0.40699,0.13971,-1.458703
1,-0.09909,-0.41728,-0.742219,0.193495,0.557537,-0.304406
2,1.323562,-0.41728,-0.742219,1.275201,0.557537,-0.304406
3,1.182393,-0.416698,-0.837178,1.019008,1.075072,0.111141
4,1.48634,-0.41239,-0.837178,1.232503,1.075072,0.111141


In [6]:
zdf = zdf.add_prefix('z')

In [7]:
zdf.head()

Unnamed: 0,zprice,zcrime,znox,zrooms,zdist,zstratio
0,0.161637,-0.419724,-0.146567,0.40699,0.13971,-1.458703
1,-0.09909,-0.41728,-0.742219,0.193495,0.557537,-0.304406
2,1.323562,-0.41728,-0.742219,1.275201,0.557537,-0.304406
3,1.182393,-0.416698,-0.837178,1.019008,1.075072,0.111141
4,1.48634,-0.41239,-0.837178,1.232503,1.075072,0.111141


In [8]:
# Coeficientes Beta (cada variável foi padronizada, ou seja, convertida ao seu valor z)
# Não é necessário incluir um intercepto, já que ele será zero
# Reproduz a equação apresentada no exemplo
modelo_z = smf.ols('zprice ~ znox + zcrime + zrooms + zdist + zstratio - 1', data=zdf)  # -1: Retira o intercepto
reg_z = modelo_z.fit()
reg_z.summary()

0,1,2,3
Dep. Variable:,zprice,R-squared (uncentered):,0.636
Model:,OLS,Adj. R-squared (uncentered):,0.632
Method:,Least Squares,F-statistic:,174.8
Date:,"Tue, 05 Jul 2022",Prob (F-statistic):,2.19e-107
Time:,15:36:38,Log-Likelihood:,-462.03
No. Observations:,506,AIC:,934.1
Df Residuals:,501,BIC:,955.2
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
znox,-0.3404,0.044,-7.651,0.000,-0.428,-0.253
zcrime,-0.1433,0.031,-4.669,0.000,-0.204,-0.083
zrooms,0.5139,0.030,17.129,0.000,0.455,0.573
zdist,-0.2348,0.043,-5.464,0.000,-0.319,-0.150
zstratio,-0.2703,0.030,-9.027,0.000,-0.329,-0.211

0,1,2,3
Omnibus:,272.145,Durbin-Watson:,0.865
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2647.578
Skew:,2.15,Prob(JB):,0.0
Kurtosis:,13.348,Cond. No.,3.33


In [None]:
# Comparação entre as estatísticas t (variáveis padronizadas e não padronizadas)
# O uso de variáveis padronizadas e não padronizadas não afetará a significância estatística
# As estatísticas t serão as mesmas

In [9]:
# Estatísticas t da regressão MQO acima (com variáveis padronizadas e retirando-se o intercepto)
reg_z.tvalues   

znox        -7.651052
zcrime      -4.669298
zrooms      17.129490
zdist       -5.464059
zstratio    -9.027422
dtype: float64

In [10]:
# Executa a regressão MQO com as variáveis não padronizadas (para comparação das estatístcas t)
modelo = smf.ols('price ~ nox + crime + rooms + dist + stratio', data=df)
reg = modelo.fit()
reg.tvalues.loc['nox':'stratio']

nox        -7.643413
crime      -4.664636
rooms      17.112387
dist       -5.458603
stratio    -9.018408
dtype: float64

In [11]:
# Executa regressão MQO com variáveis padronizadas sem retirar o intercepto (note que ele será zero, como apontado no livro)
modelo2_z = smf.ols('zprice ~ znox + zcrime + zrooms + zdist + zstratio', data=zdf)  # Não retiramos a constante
reg2_z = modelo2_z.fit()
print(f"Intercepto: {round(reg2_z.params['Intercept'], 10)}")
print(f"\nEstatísticas t:\n{reg2_z.tvalues.loc['znox':'zstratio']}")

Intercepto: -0.0

Estatísticas t:
znox        -7.643413
zcrime      -4.664636
zrooms      17.112387
zdist       -5.458603
zstratio    -9.018408
dtype: float64
