# Econometría Aplicada II
## Tarea 1
Importar librerías

In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.api import OLS

Importar datos

In [3]:
d1 = pd.read_csv('../dat/baseline.csv')
d2 = pd.read_csv('../dat/endline.csv')
d3 = pd.read_csv('../dat/completa.csv')

### 1. Balance
Tabla de balance

In [62]:
# Declarar número de individuos por grupo
n0, n1 = d1.groupby('T_nap').size()

# Seleccionar 12 variables basales
X = ['time_in_office','age_','female_','education_','sleep_night','no_of_children_','act_inbed',
     'an_12_number_of_awakenings','an_13_average_awakening_length','typing_time_hr','c27','c28_m']

# Medias de variables basales por grupo
t = d1.groupby('T_nap')[X].agg(['mean','var'])

# Ajustes estéticos a t
t = t.transpose().reset_index()
t.columns = ['var','fun','control','treatment']
t = t.pivot(index='var', columns='fun')
t.columns = ['_'.join(col) for col in t.columns]

# Calcular estadísticos
t['tau'] = t['control_mean'] - t['treatment_mean']
t['tau_var'] = t['control_var'].div(n0) + t['treatment_var'].div(n1)
t['t'] = t['tau'].div(np.sqrt(t['tau_var']))
t['p'] = (1 - stats.norm().cdf(t['t'].abs())) * 2

# Resultado presentable
t = t[['control_mean','treatment_mean','tau','t','p']].reset_index()
t

Unnamed: 0,var,control_mean,treatment_mean,tau,t,p
0,act_inbed,7.992301,8.071253,-0.078952,-0.839935,0.400945
1,age_,34.938053,34.964602,-0.026549,-0.039027,0.968869
2,an_12_number_of_awakenings,31.715624,32.188211,-0.472587,-0.621987,0.53395
3,an_13_average_awakening_length,4.478105,4.477173,0.000931,0.007186,0.994267
4,c27,0.818584,0.831858,-0.013274,-0.162058,0.87126
5,c28_m,7.570796,7.964602,-0.393805,-0.32073,0.748415
6,education_,10.336283,10.030973,0.30531,1.128002,0.259319
7,female_,0.650442,0.672566,-0.022124,-0.496037,0.619869
8,no_of_children_,1.300885,1.40708,-0.106195,-1.068706,0.285202
9,sleep_night,5.559958,5.595827,-0.035869,-0.434612,0.663844


In [72]:
m = OLS(endog=d1['T_nap'], exog=d1[X].assign(const = 1)).fit()
m.f_pvalue

0.984378537337847

### 2. Efectos de tratamiento
Declarar todas las variables dependientes

In [85]:
Y = ['productivity','corsi_measure','happy','energy']

#### a) Estimadores de Neyman

In [129]:
def neyman(frame, treatment_col, values_col):
    # Sacar arreglos C y T
    a, b = frame.groupby(treatment_col)[values_col].apply(np.array)
    # Quitar NANs
    a, b = a[~np.isnan(a)], b[~np.isnan(b)]
    # Estadístico t
    tau = np.mean(b) - np.mean(a)
    t = tau / np.sqrt(np.var(a, ddof=1) / len(a) + np.var(b, ddof=1) / len(b))
    # p-value
    p = 2 * (1 - stats.norm().cdf(np.abs(t)))
    return (values_col, tau, t, p)

In [134]:
t_ney = pd.DataFrame(data=[neyman(d2, 'T_nap', y) for y in Y], columns=['depvar','diff','t-stat','p-val'])
t_ney

Unnamed: 0,depvar,diff,t-stat,p-val
0,productivity,-171.240947,-0.976215,0.328958
1,corsi_measure,-0.064241,-0.59559,0.551449
2,happy,0.033502,0.990464,0.321947
3,energy,0.011378,0.845477,0.397844


#### b) Estimadores OLS sin controles

In [135]:
betas = []
for y in Y:
    m = OLS(endog=d2[y], exog=d2.assign(const = 1)[['const','T_nap']], missing='drop').fit(cov_type='HC0')
    betas.append(m.params['T_nap'])

In [136]:
betas

[-171.24094729961658,
 -0.06424084929926782,
 0.033501896333754694,
 0.011378002528445012]