In [4]:
import os
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
caminho = os.path.join('..', 'dados', 'Data Sets- STATA')

## C1

In [5]:
filename = os.path.join(caminho, 'VOTE1.DTA')
dfc1 = pd.read_stata(filename, preserve_dtypes=False)
dfc1.head()

Unnamed: 0,state,district,democA,voteA,expendA,expendB,prtystrA,lexpendA,lexpendB,shareA
0,AL,7,1,68,328.29599,8.737,41,5.793916,2.167567,97.407669
1,AK,1,0,62,626.377014,402.47699,60,6.439952,5.997638,60.881039
2,AZ,2,1,73,99.607002,3.065,55,4.601233,1.120048,97.014763
3,AZ,3,0,69,319.690002,26.281,64,5.767352,3.268846,92.403702
4,AR,3,0,75,159.220993,60.054001,66,5.070293,4.095244,72.612473


## i
b1 é a força do parâmetro expendA. 1% de expansão em expendA aumenta b1 voteA
## ii
H0: b1 == -b2

In [6]:
modelc1 = smf.ols(formula='voteA ~ np.log(expendA) + np.log(expendB) + prtystrA', data=dfc1)
regrc1 = modelc1.fit()

In [7]:
print(f"voteA = {regrc1.params['Intercept']:.3f} + " +
      f"{regrc1.params['np.log(expendA)']:.3f}log(expendA) + " + 
      f"{regrc1.params['np.log(expendB)']:.3f}expendB + " +
      f"{regrc1.params['prtystrA']:.3f}prtystrA")
print(f"\t({regrc1.bse['Intercept']:.3f}) \t({regrc1.bse['np.log(expendA)']:.3f}) " + 
      f"\t({regrc1.bse['np.log(expendB)']:.3f}) \t({regrc1.bse['prtystrA']:.3f})")

print(f'n = {len(dfc1)}, R^2 = {regrc1.rsquared:.4f}')

voteA = 45.079 + 6.083log(expendA) + -6.615expendB + 0.152prtystrA
	(3.926) 	(0.382) 	(0.379) 	(0.062)
n = 173, R^2 = 0.7926


## iv


In [8]:
modelc1b = smf.ols(formula='voteA ~ np.log(expendA) + I(np.log(expendB) - np.log(expendA)) + prtystrA', data=dfc1)
regrc1b = modelc1b.fit()

In [9]:
print(f"voteA = {regrc1b.params['Intercept']:.3f} + " +
      f"{regrc1b.params['np.log(expendA)']:.3f}O + "  +
      f"{regrc1b.params['I(np.log(expendB) - np.log(expendA))']:.3f}log(expendB)-log(expendA) + " +
      f"{regrc1b.params['prtystrA']:.3f}prtystrA")
print(f"\t({regrc1b.bse['Intercept']:.3f}) \t({regrc1b.bse['np.log(expendA)']:.3f}) " + 
      f"\t({regrc1b.bse['I(np.log(expendB) - np.log(expendA))']:.3f}) \t({regrc1b.bse['prtystrA']:.3f})")
print(f'n = {len(dfc1)}, R^2 = {regrc1b.rsquared:.4f}')

voteA = 45.079 + -0.532O + -6.615log(expendB)-log(expendA) + 0.152prtystrA
	(3.926) 	(0.533) 	(0.379) 	(0.062)
n = 173, R^2 = 0.7926


h0: b1 == -b2, O = 0, O = b1 - b2 (Para estimar erro padrão de h0, estimar erro padrão de b1 - b2 = 0)

When we estimate this equation we obtain θ  1 ≈ –.532 and se( θ  1 ) ≈ .533.
The t statistic for the hypothesis in part (ii) is –.532/.533 ≈ –1. Therefore, we fail to reject H 0 : β 2 = – β 1 .

In [10]:
print(regrc1.summary())
print(regrc1b.summary())

                            OLS Regression Results                            
Dep. Variable:                  voteA   R-squared:                       0.793
Model:                            OLS   Adj. R-squared:                  0.789
Method:                 Least Squares   F-statistic:                     215.2
Date:                Fri, 13 Jul 2018   Prob (F-statistic):           1.76e-57
Time:                        11:40:34   Log-Likelihood:                -596.86
No. Observations:                 173   AIC:                             1202.
Df Residuals:                     169   BIC:                             1214.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          45.0789      3.926     

In [20]:
def print_regr(desc_y, regr):
    lista = [f'{value:.3f} x {key}' for key, value in regr.params.items()]
    lista[0] = f"{regr.params['Intercept']:.3f}"
    print(desc_y + ' = ' + ' + '.join(lista))
    lista = [f'({value:.3f})' for key, value in regr.bse.items()]
    lista[0] = f"({regr.bse['Intercept']:.3f})"
    print('\t'.join(lista))
    print(f'n = {regr.nobs}, R^2 = {regr.rsquared:.4f}')

In [21]:
filename = os.path.join(caminho, 'HTV.DTA')
dfc11 = pd.read_stata(filename, preserve_dtypes=False)
dfc11.head()

Unnamed: 0,wage,abil,educ,ne,nc,west,south,exper,motheduc,fatheduc,...,ne18,nc18,south18,west18,urban18,tuit17,tuit18,lwage,expersq,ctuit
0,12.019231,5.027738,15,0,0,1,0,9,12,12,...,1,0,0,0,1,7.582914,7.260242,2.486508,81,-0.322671
1,8.912656,2.03717,13,1,0,0,0,8,12,10,...,1,0,0,0,1,8.595144,9.499537,2.187472,64,0.904392
2,15.514334,2.475895,15,1,0,0,0,11,12,16,...,1,0,0,0,1,7.311346,7.311346,2.741764,121,0.0
3,13.333333,3.60924,15,1,0,0,0,6,12,12,...,1,0,0,0,1,9.499537,10.16207,2.590267,36,0.662534
4,11.07011,2.636546,13,1,0,0,0,15,12,15,...,1,0,0,0,1,7.311346,7.311346,2.404249,225,0.0


In [22]:
modelc11 = smf.ols(formula='educ ~ motheduc + fatheduc + abil', data=dfc11)
regrc11 = modelc11.fit()
print_regr('educ', regrc11)

educ = 8.449 + 0.189 x motheduc + 0.111 x fatheduc + 0.502 x abil
(0.290)	(0.029)	(0.020)	(0.026)
n = 1230.0, R^2 = 0.4275


In [31]:
hypotheses = '(motheduc=fatheduc)'
t_test = regrc11.t_test(hypotheses)
print('t_test')
print(t_test)
f_test = regrc11.f_test(hypotheses)
print('f_test')
print(f_test)

t_test
                             Test for Constraints                             
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.0780      0.043      1.834      0.067      -0.005       0.162
f_test
<F test: F=array([[3.36381227]]), p=0.0668858831671904, df_denom=1226, df_num=1>


In [25]:
modelc11b = smf.ols(formula='educ ~ motheduc + fatheduc + abil + tuit17 + tuit18', data=dfc11)
regrc11b = modelc11b.fit()
print_regr('educ', regrc11b)

educ = 8.291 + 0.192 x motheduc + 0.111 x fatheduc + 0.500 x abil + 0.032 x tuit17 + -0.016 x tuit18
(0.315)	(0.029)	(0.020)	(0.026)	(0.063)	(0.065)
n = 1230.0, R^2 = 0.4283


In [33]:
hypotheses = '(tuit17=tuit18=0)'
t_test = regrc11b.t_test(hypotheses)
print('t_test')
print(t_test)
f_test = regrc11b.f_test(hypotheses)
print('f_test')
print(f_test)

t_test
                             Test for Constraints                             
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.0484      0.127      0.380      0.704      -0.201       0.298
c1            -0.0164      0.065     -0.255      0.799      -0.143       0.110
f_test
<F test: F=array([[0.88003953]]), p=0.41502878576960733, df_denom=1224, df_num=2>


In [34]:
np.corrcoef(dfc11['tuit17'], dfc11['tuit18'])

array([[1.        , 0.98083326],
       [0.98083326, 1.        ]])

In [36]:
# Variáveis altamente correlacionadas, usar a média!!!
modelc11c = smf.ols(formula='educ ~ motheduc + fatheduc + abil + I(tuit17 + tuit18 / 2)', data=dfc11)
regrc11c = modelc11c.fit()
print_regr('educ', regrc11c)

educ = 8.289 + 0.192 x motheduc + 0.111 x fatheduc + 0.500 x abil + 0.011 x I(tuit17 + tuit18 / 2)
(0.315)	(0.029)	(0.020)	(0.026)	(0.008)
n = 1230.0, R^2 = 0.4283


In [38]:
hypotheses = '(I(tuit17 + tuit18 / 2)=0)'
t_test = regrc11c.t_test(hypotheses)
print('t_test')
print(t_test)

t_test
                             Test for Constraints                             
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.0107      0.008      1.283      0.200      -0.006       0.027


In [40]:
print(regrc11c.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.428
Model:                            OLS   Adj. R-squared:                  0.426
Method:                 Least Squares   F-statistic:                     229.4
Date:                Fri, 13 Jul 2018   Prob (F-statistic):          4.97e-147
Time:                        11:54:35   Log-Likelihood:                -2454.2
No. Observations:                1230   AIC:                             4918.
Df Residuals:                    1225   BIC:                             4944.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
Intercept                  8