In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np

import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col


In [2]:
df = pd.read_excel("clean_UYdata.xlsx")

In [3]:
# select subset of data
reg_data = df[["villagecode_str",   # for clustering
                "hhnum_b", "edu_hhhead_b", "occu_b", "age_pc_b", "pc_edu_b", 
                "hindu_b", "hh_caste_b", "assets_index",   #  HH controls
                "education_b", "healthstatus_b", "road_b", "irrigation_b",  
                "subdistricthqdist_c", #  Village controls
                # "noncompliance_village",
                "distance_dealer_close_b",
                "distdlr_imputed_trt",
                "totrefills_omc_b2", "treatment012", "treatment_h", "treatment_hs",
                "totrefills_omc_e2", 
                "tehsil_e"      # fixed  effects
                   ]]  
# define controls
control = "+ hhnum_b + edu_hhhead_b + occu_b + age_pc_b + pc_edu_b + hindu_b + hh_caste_b + assets_index + education_b + healthstatus_b + road_b + irrigation_b + subdistricthqdist_c"

# Table 7

In [4]:
# defining formulas for the two models with Imputed distance
formula_1 = "totrefills_omc_e2 ~ treatment012 + totrefills_omc_b2 + distance_dealer_close_b + distance_dealer_close_b * treatment012 " 
formula_2 = "totrefills_omc_e2 ~ treatment_h + treatment_hs + totrefills_omc_b2 + distance_dealer_close_b + distance_dealer_close_b * treatment_h + distance_dealer_close_b * treatment_hs"

In [5]:
models = []

models.append( smf.ols(formula_1 + control,
                        data= reg_data
                        ).fit(cov_type='cluster', 
                              cov_kwds={'groups': reg_data["villagecode_str"]}
                            )
              )

      # Non fixed effects model
models.append( smf.ols(formula_2 + control,
                        data= reg_data
                        ).fit(cov_type='cluster', 
                              cov_kwds={'groups': reg_data["villagecode_str"]}
                              )
            )
      
      # fixed effects: With dummies i.e least sq dummy variable model
models.append( smf.ols(formula_1 + control + "+ C(tehsil_e)",
                        data= reg_data
                        ).fit(cov_type='cluster', 
                              cov_kwds={'groups': reg_data["villagecode_str"]}
                              )
            )

      # fixed effects: With dummies i.e least sq dummy variable model
models.append( smf.ols(formula_2 + control + "+ C(tehsil_e)",
                        data= reg_data
                        ).fit(cov_type='cluster', 
                              cov_kwds={'groups': reg_data["villagecode_str"]}
                              )
            )

In [6]:
# to get the order of regression
treat = ["treatment012","treatment_h","treatment_hs"] 
interaction = ["distance_dealer_close_b:"+i for i in treat] 
reg_order = treat + ["distance_dealer_close_b"] + interaction + ["totrefills_omc_b2"] 

In [7]:
info_dict={'No. observations' : lambda x: f"{int(x.nobs):d}"}
           
results_table = summary_col(results = models,
                            float_format='%0.3f',
                            stars = True,
                            model_names=['Dist Actual Non FE (1)',
                                         'Dist Actual Non FE (2)',
                                         'Dist Actual FE (1)',
                                         'Dist Actual FE (1)',
                                         ],
                            info_dict = info_dict,
                            regressor_order= reg_order,
                            drop_omitted=  True
                            )

In [8]:
# Imputed distance part 
results_table


0,1,2,3,4
,Dist Actual Non FE (1) I,Dist Actual Non FE (2) I,Dist Actual FE (1) I,Dist Actual FE (1) II
treatment012,0.335,,0.350,
,(0.253),,(0.251),
treatment_h,,0.174,,0.177
,,(0.312),,(0.311)
treatment_hs,,0.523**,,0.548**
,,(0.244),,(0.247)
distance_dealer_close_b,-0.009,-0.011,-0.002,-0.003
,(0.043),(0.044),(0.042),(0.043)
distance_dealer_close_b:treatment012,-0.051,,-0.050,
