# ECON 148 Project

### Preliminary Data Cleaning for Replication

In [1]:
# Load in needed packages

import zipfile
import os
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from statsmodels.api import OLS, add_constant
from linearmodels.panel import PanelOLS
from statsmodels.stats.api import anova_lm

In [2]:
# Unzip zip file

zip_path = '../Data/113599-V1.zip'

extract_to = './extracted_data'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print(os.listdir(extract_to))

['2013-0533_data--TO-SUBMIT-', 'LICENSE.txt']


In [3]:
# Open .do files to understand how to replicate figures
extract_to = './extracted_data/2013-0533_data--TO-SUBMIT-'
with open(os.path.join(extract_to, '2013-0533_do_figures.do'), 'r') as f:
    content_figures = f.read()

print(content_figures)

/*******************************************************************************      
Program Name: 	2013-0533_do_figures  
Contact:  		Cynthia Kinnan (c-kinnan@northwestern.edu)
Last Modified: 	5 May 2014
Purpose: 		Replicates all figures from "The miracle of microfinance? Evidence
				from a randomized evaluation" (Banerjee et al.), AEJ, 2014
Files Used: 	2013-0533_data_endlines1and2.dta
Files Created:	figure1.png
				figure2.png
				figure3.png
				figure4.png
*******************************************************************************/
version 13.1
cap log close
clear all
set more off
set mem 100m
pause on

*DATA DIRECTORY
global datadir "C:/Users/hreppst/Dropbox/Spandana/Paper/AEJ Final/Data/"

*OUTPUT DIRECTORY
global outputdir "C:/Users/hreppst/Dropbox/Spandana/Paper/AEJ Final/Data/Output"

*LOG FILE
log using 2013-0533_log_figures.smcl, replace 

cd "$outputdir"

********************************************************************************
*******        Figures 1-4: Qua

In [4]:
# Open .do files to understand how to replicate tables

with open(os.path.join(extract_to, '2013-0533_do_tables.do'), 'r') as f:
    content_tables = f.read()

print(content_tables[:5000])

/*******************************************************************************      
Program Name: 	2013-0533_do_tables  
Contact:  		Cynthia Kinnan (c-kinnan@northwestern.edu)
Last Modified: 	5 May 2014
Purpose: 		Replicates all tables from "The miracle of microfinance? Evidence
				from a randomized evaluation" (Banerjee et al.), AEJ, 2014
Files Used: 	2013-0533_data_baseline.dta
				2013-0533_data_endlines1and2.dta
				2013-0533_data_census.dta
				2013-0533_data_endline1businesstype.dta
Files Created:	table1a.txt
				table1b.txt
				table2.txt
				table3.txt
				table3b.txt
				table3c.txt
				table4.txt
				table5.txt
				table6.txt
				table7.txt
				table_index_pvals.txt
				tableA1.txt
				tableA2.txt
				tableA3.txt
				tableA4.txt
				tableA5.txt
*******************************************************************************/
cap log close
clear all
set more off
set mem 100m
pause on

*DATA DIRECTORY
global datadir "C:/Users/hreppst/Dropbox/Spandana/Paper/AEJ Final/Data/"



In [5]:
# Create paths for .dta files
file_path = os.path.join(extract_to, '2013-0533_data_endlines1and2.dta')

file_path_2 = os.path.join(extract_to, '2013-0533_data_baseline.dta')

file_path_3 = os.path.join(extract_to, '2013-0533_data_census.dta')

file_path_4 = os.path.join(extract_to, '2013-0533_data_endline1businesstype.dta')

# Create DataFrames for the different .dta files
endlines = pd.read_stata(file_path)

baseline = pd.read_stata(file_path_2)

census = pd.read_stata(file_path_3)

endlines = pd.read_stata(file_path_4)

In [6]:
# First 5 rows of DataFrames

# endlines.head()

In [7]:
# baseline.head()

In [8]:
# census.head()

In [9]:
# endlines.head()

### Table 1A Replication

In [10]:
# Create groups for variables in baseline DataFrame

hh_composition = ["hh_size", "adults", "children", "male_head", "head_age", "head_noeduc"]
credit_access = ["spandana", "othermfi", "bank", "informal", "anyloan"]
loan_amt = ["spandana_amt", "othermfi_amt", "bank_amt", "informal_amt", "anyloan_amt"]
self_emp_activ = ["total_biz", "female_biz", "female_biz_pct"]
businesses = ["bizrev", "bizexpense", "bizinvestment", "bizemployees", "hours_weekbiz"]

# Create group for businesses variables
businesses_allHH = []

# Create variables for businesses variables representing 1 if total_biz is 1 and 0 if total_biz is 0
for var in businesses:
    new_var = f"{var}_allHH"
    businesses_allHH.append(new_var)
    baseline[new_var] = baseline[var]
    baseline.loc[baseline["total_biz"] == 0, new_var] = 0

# Create group for consumption variables

consumption = ["total_exp_mo", "nondurable_exp_mo", "durables_exp_mo", "home_durable_index"]

# Make list with all variables
allvars = hh_composition + credit_access + loan_amt + self_emp_activ + businesses + businesses_allHH + consumption

In [11]:
# Drop NAs

baseline = baseline[baseline['treatment'].notna()]
baseline = baseline.reset_index()

# Convert treatment to binary: 1 = Treatment, 0 = Control
baseline["treatment_binary"] = baseline["treatment"].astype(str).map({"Control": 0, "Treatment": 1})

In [12]:
# Convert categorical columns to object type first
baseline['spandana'] = baseline['spandana'].astype('object')
baseline['othermfi'] = baseline['othermfi'].astype('object')
baseline['bank'] = baseline['bank'].astype('object')
baseline['informal'] = baseline['informal'].astype('object')
baseline['anyloan'] = baseline['anyloan'].astype('object')

# Replace 'Yes' with 1, 'No' with 0
baseline['spandana'].replace({'Yes': 1, 'No': 0}, inplace=True)
baseline['othermfi'].replace({'Yes': 1, 'No': 0}, inplace=True)
baseline['bank'].replace({'Yes': 1, 'No': 0}, inplace=True)
baseline['informal'].replace({'Yes': 1, 'No': 0}, inplace=True)
baseline['anyloan'].replace({'Yes': 1, 'No': 0}, inplace=True)

# Fill NaN with -1
baseline['spandana'].fillna(-1, inplace=True)
baseline['othermfi'].fillna(-1, inplace=True)
baseline['bank'].fillna(-1, inplace=True)
baseline['informal'].fillna(-1, inplace=True)
baseline['anyloan'].fillna(-1, inplace=True)

In [13]:
import statsmodels.api as sm

# List of variables to include
vars_to_analyze = ['spandana', 'othermfi', 'bank', 'informal', 'anyloan']

# Prepare output list
table1a_binary_results = []

# Ensure binary treatment and areaid category
baseline['treatment_binary'] = baseline['treatment'].map({'Control': 0, 'Treatment': 1}).astype(int)
baseline['areaid'] = baseline['areaid'].astype('category')

for var in vars_to_analyze:
    temp = baseline[[var, 'treatment_binary', 'areaid']].copy()
    temp = temp[temp[var].isin([0, 1])]
    temp = temp.dropna(subset=[var, 'treatment_binary', 'areaid'])

    if temp.empty:
        table1a_results.append([None, None, None, None, None])
        continue

    control = temp[temp['treatment_binary'] == 0]
    N = control.shape[0]
    mean_c = control[var].mean()
    std_c = control[var].std()

    temp['const'] = 1.0
    model = sm.OLS(temp[var], temp[['const', 'treatment_binary']])
    results = model.fit(cov_type='cluster', cov_kwds={'groups': temp['areaid']})

    diff = results.params['treatment_binary']
    p_val = results.pvalues['treatment_binary']

    table1a_binary_results.append([N, mean_c, std_c, diff, p_val])

# Create results DataFrame
table1a_binary_df = pd.DataFrame(table1a_binary_results,
                          columns=["Obs", "Control_mean", "Control_sd", "Difference_in_mean", "P_val"],
                          index=vars_to_analyze)

table1a_binary_df

Unnamed: 0,Obs,Control_mean,Control_sd,Difference_in_mean,P_val
spandana,1213,0.0,0.0,0.006568,0.192208
othermfi,1213,0.010717,0.10301,0.006524,0.450831
bank,1213,0.036274,0.187047,0.001493,0.858449
informal,1213,0.632317,0.482373,0.001509,0.958114
anyloan,1213,0.680132,0.466618,0.002134,0.941766


In [14]:
import statsmodels.api as sm

# Define full list of variables you want to analyze
# (This must be defined earlier in your script)
# Example:
# allvars = ['income', 'savings', 'education', 'spandana', 'othermfi', ...]

# Binary vars to exclude
vars_to_analyze = ['spandana', 'othermfi', 'bank', 'informal', 'anyloan']

# Filter to continuous variables only
non_binary_vars = [var for var in allvars if var not in vars_to_analyze]

# Prepare output list
table1a_nonbinary_results = []


for var in non_binary_vars:
    temp = baseline[[var, 'treatment_binary', 'areaid']].copy()

    # Control group stats
    control = temp[temp['treatment_binary'] == 0]
    N = control.shape[0]
    mean_c = control[var].mean()
    std_c = control[var].std()

    # Regression
    temp['const'] = 1.0
    model = sm.OLS(temp[var], temp[['const', 'treatment_binary']])
    results = model.fit(cov_type='cluster', cov_kwds={'groups': temp['areaid']})

    # Extract results
    diff = results.params['treatment_binary']
    p_val = results.pvalues['treatment_binary']

    table1a_nonbinary_results.append([N, mean_c, std_c, diff, p_val])

# Create final results table
table1a_nonbinary_df = pd.DataFrame(table1a_nonbinary_results,
                          columns=["Obs", "Control_mean", "Control_sd", "Difference_in_mean", "P_val"],
                          index=non_binary_vars)
table1a_nonbinary_df

Unnamed: 0,Obs,Control_mean,Control_sd,Difference_in_mean,P_val
hh_size,1220,5.037705,1.666212,0.095082,0.300973
adults,1220,3.438525,1.465599,-0.011475,0.872767
children,1220,1.59918,1.228256,0.104098,0.095204
male_head,1220,0.907072,0.29045,,
head_age,1220,41.149671,10.839448,,
head_noeduc,1220,0.370066,0.483021,,
spandana_amt,1220,0.0,0.0,,
othermfi_amt,1220,201.154163,2742.363893,,
bank_amt,1220,7438.169827,173268.343989,,
informal_amt,1220,28460.016488,65312.1557,,


In [15]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Assume allvars is already defined
# Example:
# allvars = list(baseline.columns)

# Binary vars to exclude from this loop
binary_vars = ['spandana', 'othermfi', 'bank', 'informal', 'anyloan']
non_binary_vars = [var for var in allvars if var not in binary_vars]

# Store regression results here
table1a_nonbinary_results = []

for var in non_binary_vars:
    temp = baseline[[var, 'treatment_binary', 'areaid']].copy()

    # Drop rows with missing values to avoid crash
    temp = temp.dropna()

    # Control group stats
    control = temp[temp['treatment_binary'] == 0]
    N = control.shape[0]
    mean_c = control[var].mean()
    std_c = control[var].std()

    # Default values if regression fails
    diff = np.nan
    p_val = np.nan

    # Check if both treatment and variable have variation
    if temp[var].std() > 0 and temp['treatment_binary'].std() > 0:
        try:
            temp['const'] = 1.0
            model = sm.OLS(temp[var], temp[['const', 'treatment_binary']])
            results = model.fit(cov_type='cluster', cov_kwds={'groups': temp['areaid']})
            diff = results.params['treatment_binary']
            p_val = results.pvalues['treatment_binary']
        except:
            # You can print/log here if needed
            diff = np.nan
            p_val = np.nan
    else:
        # No variation = no regression → set safe defaults
        diff = 0.0
        p_val = 1.0

    table1a_nonbinary_results.append([N, mean_c, std_c, diff, p_val])

# Build final DataFrame
table1a_nonbinary_df = pd.DataFrame(
    table1a_nonbinary_results,
    columns=["Obs", "Control_mean", "Control_sd", "Difference_in_mean", "P_val"],
    index=non_binary_vars
)

# Optional: sort or save
# table1a_nonbinary_df.to_csv("table1a.csv")

# View the table
table1a_nonbinary_df

Unnamed: 0,Obs,Control_mean,Control_sd,Difference_in_mean,P_val
hh_size,1220,5.037705,1.666212,0.095082,0.300973
adults,1220,3.438525,1.465599,-0.011475,0.872767
children,1220,1.59918,1.228256,0.104098,0.095204
male_head,1216,0.907072,0.29045,-0.012249,0.379004
head_age,1216,41.149671,10.839448,-0.243498,0.675317
head_noeduc,1216,0.370066,0.483021,-0.007699,0.78638
spandana_amt,1213,0.0,0.0,68.965517,0.188973
othermfi_amt,1213,201.154163,2742.363893,170.35651,0.567014
bank_amt,1213,7438.169827,173268.343989,-5419.69692,0.276039
informal_amt,1213,28460.016488,65312.1557,-570.459838,0.855498


In [16]:
# View the table
table1a_nonbinary_df

Unnamed: 0,Obs,Control_mean,Control_sd,Difference_in_mean,P_val
hh_size,1220,5.037705,1.666212,0.095082,0.300973
adults,1220,3.438525,1.465599,-0.011475,0.872767
children,1220,1.59918,1.228256,0.104098,0.095204
male_head,1216,0.907072,0.29045,-0.012249,0.379004
head_age,1216,41.149671,10.839448,-0.243498,0.675317
head_noeduc,1216,0.370066,0.483021,-0.007699,0.78638
spandana_amt,1213,0.0,0.0,68.965517,0.188973
othermfi_amt,1213,201.154163,2742.363893,170.35651,0.567014
bank_amt,1213,7438.169827,173268.343989,-5419.69692,0.276039
informal_amt,1213,28460.016488,65312.1557,-570.459838,0.855498


In [17]:
# Find index position of 'head_age'
insert_loc = table1a_nonbinary_df.index.get_loc('head_noeduc') + 1

# Split the nonbinary table into two parts: before and after insertion point
top = table1a_nonbinary_df.iloc[:insert_loc]
bottom = table1a_nonbinary_df.iloc[insert_loc:]

# Combine with the binary df inserted in the middle
table1a_combined_df = pd.concat([top, table1a_binary_df, bottom])

# Optional: reset the index name if you want to make it cleaner for output
table1a_combined_df.index.name = 'Variable'

In [18]:
# Round all float values to 3 decimal places
table1a_combined_df = table1a_combined_df.round(3)

# View the result
table1a_combined_df

Unnamed: 0_level_0,Obs,Control_mean,Control_sd,Difference_in_mean,P_val
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
hh_size,1220,5.038,1.666,0.095,0.301
adults,1220,3.439,1.466,-0.011,0.873
children,1220,1.599,1.228,0.104,0.095
male_head,1216,0.907,0.29,-0.012,0.379
head_age,1216,41.15,10.839,-0.243,0.675
head_noeduc,1216,0.37,0.483,-0.008,0.786
spandana,1213,0.0,0.0,0.007,0.192
othermfi,1213,0.011,0.103,0.007,0.451
bank,1213,0.036,0.187,0.001,0.858
informal,1213,0.632,0.482,0.002,0.958


## Table 1B

In [19]:
# Open .do files to understand how to replicate tables

with open(os.path.join(extract_to, '2013-0533_do_tables.do'), 'r') as f:
    content_tables = f.read()

print(content_tables[:6000])

/*******************************************************************************      
Program Name: 	2013-0533_do_tables  
Contact:  		Cynthia Kinnan (c-kinnan@northwestern.edu)
Last Modified: 	5 May 2014
Purpose: 		Replicates all tables from "The miracle of microfinance? Evidence
				from a randomized evaluation" (Banerjee et al.), AEJ, 2014
Files Used: 	2013-0533_data_baseline.dta
				2013-0533_data_endlines1and2.dta
				2013-0533_data_census.dta
				2013-0533_data_endline1businesstype.dta
Files Created:	table1a.txt
				table1b.txt
				table2.txt
				table3.txt
				table3b.txt
				table3c.txt
				table4.txt
				table5.txt
				table6.txt
				table7.txt
				table_index_pvals.txt
				tableA1.txt
				tableA2.txt
				tableA3.txt
				tableA4.txt
				tableA5.txt
*******************************************************************************/
cap log close
clear all
set more off
set mem 100m
pause on

*DATA DIRECTORY
global datadir "C:/Users/hreppst/Dropbox/Spandana/Paper/AEJ Final/Data/"



In [20]:
endlines = pd.read_stata(file_path)
len(endlines.columns)

187

In [21]:
hh_composition = ["hhsize", "adults", "children", "male_head", "head_age", "head_noeduc"]
credit_access = ["spandana", "othermfi", "anybank", "anyinformal", "anyloan"]
loan_amt = ["spandana_amt", "othermfi_amt", "bank_amt", "informal_amt", "anyloan_amt"]
self_emp_activ = ["total_biz", "female_biz_allHH", "female_biz_pct"]
businesses = ["bizrev", "bizexpense", "bizinvestment", "bizemployees", "hours_week_biz"]
businesses_allHH = ["bizrev_allHH", "bizexpense_allHH", "bizinvestment_allHH", "bizemployees_allHH", "hours_week_biz_allHH"]
consumption = ["total_exp_mo", "nondurable_exp_mo", "durables_exp_mo", "home_durable_index"]

# Combine all lists into a single list
allvars = (
    hh_composition
    + credit_access
    + loan_amt
    + self_emp_activ
    + businesses
    + businesses_allHH
    + consumption
)


In [22]:
# Define the variable lists
endlines_copy = endlines.copy()

businesses = ["bizrev", "bizexpense", "bizinvestment", "bizemployees", "hours_week_biz"]
additional_vars = ["female_biz"]
vars_to_process = businesses + additional_vars

# Loop through rounds (1 and 2)
for i in [1, 2]:
    for var in vars_to_process:
        var_col = f"{var}_{i}"
        allhh_col = f"{var}_allHH_{i}"
        total_biz_col = f"total_biz_{i}"
        
        # Create the _allHH_ version
        endlines_copy[allhh_col] = endlines_copy[var_col]
        
        # Set value to NaN if household did not run a business
        endlines_copy.loc[endlines_copy[total_biz_col] == 0, var_col] = np.nan


In [23]:
# Rename columns: varA_1 → varA1, varA_2 → varA2
for var in allvars:
    for suffix in ['1', '2']:
        old_name = f"{var}_{suffix}"
        new_name = f"{var}{suffix}"
        if old_name in endlines_copy.columns:
            endlines_copy.rename(columns={old_name: new_name}, inplace=True)

# Reshape to long format
# Assumes there's a unique household ID column like 'hhid'
df_long = pd.wide_to_long(endlines_copy, 
                          stubnames=allvars, 
                          i='hhid', 
                          j='endline', 
                          sep='', 
                          suffix='[12]').reset_index()

In [24]:
df_long = df_long[["hhid", "areaid", "endline", "treatment"] + allvars]
df_long_w_dummies = pd.get_dummies(df_long, columns=['endline'], prefix='endline')


endline_1 = df_long_w_dummies[df_long_w_dummies["endline_1"] == 1].copy()
endline_2 = df_long_w_dummies[df_long_w_dummies["endline_2"] == 1].copy()

endline_1["spandana"] = pd.to_numeric(endline_1["spandana"].map({"No":0, "Yes":1}))
endline_1["othermfi"] = pd.to_numeric(endline_1["othermfi"].map({"No":0, "Yes":1}))
endline_1["anybank"] = pd.to_numeric(endline_1["anybank"].map({"No":0, "Yes":1}))
endline_1["anyinformal"] = pd.to_numeric(endline_1["anyinformal"].map({"No":0, "Yes":1}))
endline_1["anyloan"] = pd.to_numeric(endline_1["anyloan"].map({"No":0, "Yes":1}))

endline_1_control =  endline_1[endline_1["treatment"] == "Control"]

endline_1_control.describe()

transpose_1 = endline_1_control.describe().T[["count", "mean", "std"]].reset_index()
transpose_1

Unnamed: 0,index,count,mean,std
0,hhid,3264.0,4083.6731,2101.773755
1,areaid,3264.0,56.95527,29.556583
2,hhsize,3264.0,5.645221,2.151706
3,adults,3264.0,3.886642,1.754296
4,children,3264.0,1.737745,1.309517
5,male_head,3261.0,0.894511,0.30723
6,head_age,3257.0,41.14891,10.221965
7,head_noeduc,3256.0,0.311425,0.463147
8,spandana,3247.0,0.050508,0.219025
9,othermfi,3183.0,0.148602,0.355752


In [25]:
endline_2 = df_long_w_dummies[df_long_w_dummies["endline_2"] == 1].copy()

endline_2["spandana"] = pd.to_numeric(endline_2["spandana"].map({"No":0, "Yes":1}))
endline_2["othermfi"] = pd.to_numeric(endline_2["othermfi"].map({"No":0, "Yes":1}))
endline_2["anybank"] = pd.to_numeric(endline_2["anybank"].map({"No":0, "Yes":1}))
endline_2["anyinformal"] = pd.to_numeric(endline_2["anyinformal"].map({"No":0, "Yes":1}))
endline_2["anyloan"] = pd.to_numeric(endline_2["anyloan"].map({"No":0, "Yes":1}))

endline_2_control =  endline_2[endline_2["treatment"] == "Control"]

endline_2_control.describe()

transpose_2 = endline_2_control.describe().T[["count", "mean", "std"]].reset_index()
transpose_2

Unnamed: 0,index,count,mean,std
0,hhid,3264.0,4083.6731,2101.773755
1,areaid,3264.0,56.95527,29.556583
2,hhsize,2943.0,6.269113,2.5479
3,adults,2943.0,4.038736,1.848216
4,children,2943.0,1.763846,1.321124
5,male_head,2938.0,0.811096,0.391499
6,head_age,2940.0,42.257823,10.153644
7,head_noeduc,2940.0,0.291837,0.454685
8,spandana,2943.0,0.111451,0.314743
9,othermfi,2943.0,0.268434,0.44322


In [26]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

# Combine endline 1 and endline 2 control groups
endline_1_control = endline_1_control.copy()
endline_2_control = endline_2_control.copy()

endline_1_control["endline2"] = 0
endline_2_control["endline2"] = 1

df_ctrl_combined = pd.concat([endline_1_control, endline_2_control])

# List of binary vars you processed earlier
# Container for results
results = []

for var in allvars:
    row = {'index': var}

    # Drop missing values
    df_sub = df_ctrl_combined[["areaid", "endline2", var]].dropna()

    # Summary stats for each endline
    el1_vals = df_sub[df_sub["endline2"] == 0][var]
    el2_vals = df_sub[df_sub["endline2"] == 1][var]

   

    # Run regression with clustered SEs
    try:
        model = smf.ols(f"{var} ~ endline2", data=df_sub).fit(
            cov_type='cluster',
            cov_kwds={'groups': df_sub['areaid']}
        )
        row['Diff'] = model.params['endline2']
        row['P_Value'] = model.pvalues[-1]
    except Exception as e:
        row['Diff'] = np.nan
        row['P_Value'] = np.nan

    results.append(row)

# Create results table
summary_df = pd.DataFrame(results)
summary_df = summary_df[[
    'index', 'Diff', 'P_Value'
]]


In [27]:
summary_df[['Diff', 'P_Value']] = summary_df[['Diff', 'P_Value']].round(3)


In [28]:
summarys = transpose_1.merge(transpose_2, on="index", how = "inner")
full = summarys.merge(summary_df, on = "index", how="inner")



In [29]:
full = full.round(3).rename({"count_x":"Obs. (1)", "count_y":"Obs. (4)", "mean_x":"Mean (2)", "mean_y":"Mean (5)", "std_x":"SD (3)", "std_y":"SD (6)", "Diff":"Coeff. (7)"}, axis = 1)

In [30]:
full.set_index("index")

Unnamed: 0_level_0,Obs. (1),Mean (2),SD (3),Obs. (4),Mean (5),SD (6),Coeff. (7),P_Value
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
hhsize,3264.0,5.645,2.152,2943.0,6.269,2.548,0.624,0.0
adults,3264.0,3.887,1.754,2943.0,4.039,1.848,0.152,0.0
children,3264.0,1.738,1.31,2943.0,1.764,1.321,0.026,0.242
male_head,3261.0,0.895,0.307,2938.0,0.811,0.391,-0.083,0.0
head_age,3257.0,41.149,10.222,2940.0,42.258,10.154,1.109,0.0
head_noeduc,3256.0,0.311,0.463,2940.0,0.292,0.455,-0.02,0.017
spandana,3247.0,0.051,0.219,2943.0,0.111,0.315,0.061,0.0
othermfi,3183.0,0.149,0.356,2943.0,0.268,0.443,0.12,0.0
anybank,3247.0,0.079,0.27,2943.0,0.073,0.26,-0.006,0.476
anyinformal,3247.0,0.761,0.427,2943.0,0.603,0.489,-0.158,0.0
