In [14]:
import networkx as nx
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

In [22]:
household = pd.read_excel('household.xlsx')
cross_sectional = pd.read_excel('cross_sectional.xlsx')

In [23]:
mf = {'mf'+str(num): 'MF Dummy/MF'+str(num)+'.csv' for num in cross_sectional['village']}
mf_list = pd.DataFrame()
for file_name in mf.values():
    key = pd.read_csv(file_name, header = None)
    mf_list = pd.concat([mf_list, key], axis = 0)
mf_list = mf_list.reset_index(drop=True)

In [24]:
len(mf_list)

9598

In [25]:
house_43 = pd.DataFrame()
for vil in set(cross_sectional['village']):
    house_43 = pd.concat([house_43, household[household.village == vil]], axis = 0)

house_43 = house_43.reset_index(drop=True)

#filtered household + MF
house_mf_43 = pd.concat([house_43, mf_list], axis = 1)
house_mf_43 = house_mf_43.rename(columns = {0:'mf_adoption'})

In [26]:
len(house_mf_43)

9598

In [27]:
adj_data = {'vil_'+str(name):'Adjacency Matrices/adj_allVillageRelationships_HH_vilno_'+str(name)+'.csv' for name in cross_sectional['village']}

vil = {}
for file_name, index in zip(adj_data.values(),cross_sectional['village']) :
    key = pd.read_csv(file_name, header = None)
    adj_mat = key.values
    vil[index] = nx.from_numpy_matrix(adj_mat, create_using=nx.Graph())

## Eigenvector centrality taking leaders

In [28]:
eigen_centrality_taking = {}
for vil_ind in set(house_mf_43.village):
    leader_ind = house_mf_43[house_mf_43.apply(lambda x :x['village'] == int(vil_ind) and x['leader'] == 1 and x['mf_adoption'] == 1, axis = 1)].adjmatrix_key

    eigen_cen_taking = []
    sum_eigen = 0
    count = 0
    if len(leader_ind) == 0:
        continue
    else:
        for lead_ind in leader_ind:
            sum_eigen += nx.eigenvector_centrality(vil[int(vil_ind)], max_iter= 400)[int(lead_ind)-1]
            count += 1
        eigen_centrality_taking[vil_ind] = sum_eigen/count

In [29]:
eigen_centrality_taking_df = pd.DataFrame.from_dict(data = eigen_centrality_taking, orient='index')
eigen_centrality_taking_df.rename(columns={0:'eig_cen_taking'}, inplace=True)
eigen_centrality_taking_df['village'] = eigen_centrality_taking_df.index
eigen_centrality_taking_df.reset_index(drop=True, inplace=True)
eigen_centrality_taking_df.head()

Unnamed: 0,eig_cen_taking,village
0,0.077894,1
1,0.028603,2
2,0.047894,3
3,0.107557,4
4,0.133721,6


## Eigenvector centrality of leaders

In [30]:
eigen_centrality = {}
for vil_ind in set(house_mf_43.village):
    leader_ind = house_mf_43[house_mf_43.apply(lambda x :x['village'] == int(vil_ind) and x['leader'] == 1, axis = 1)].adjmatrix_key
    eigen_cen = []
    sum_eigen = 0
    count = 0
    if len(leader_ind) == 0:
        continue
    else:
        for lead_ind in leader_ind:
            sum_eigen += nx.eigenvector_centrality(vil[int(vil_ind)], max_iter= 400)[int(lead_ind)-1]
            count += 1
        eigen_centrality[vil_ind] = sum_eigen/count

In [31]:
eigen_centrality_df = pd.DataFrame.from_dict(data=eigen_centrality, orient='index')
eigen_centrality_df = eigen_centrality_df.rename(columns={0:'eigen_cen_leader'})
eigen_centrality_df['village'] = eigen_centrality_df.index
eigen_centrality_df = eigen_centrality_df.reset_index(drop=True)
eigen_centrality_df.head()

Unnamed: 0,eigen_cen_leader,village
0,0.088717,1
1,0.07825,2
2,0.064053,3
3,0.066193,4
4,0.121904,6


## Degree of leaders

In [32]:
degree = {}
for vil_ind in set(house_mf_43.village):
    leader_ind = house_mf_43[house_mf_43.apply(lambda x :x['village'] == int(vil_ind) and x['leader'] == 1 and x['hhSurveyed'] == 1, axis = 1)].adjmatrix_key
    eigen_cen = []
    sum_degree = 0
    count = 0
    if len(leader_ind) == 0:
        continue
    else:
        for lead_ind in leader_ind:
            sum_degree += nx.degree(vil[int(vil_ind)])[int(lead_ind)-1]
            count += 1
        degree[vil_ind] = sum_degree/count

In [33]:
degree_df = pd.DataFrame.from_dict(data=degree, orient='index')
degree_df = degree_df.rename(columns={0:'degree_leader'})
degree_df['village'] = degree_df.index
degree_df = degree_df.reset_index(drop=True)
degree_df.head()

Unnamed: 0,degree_leader,village
0,21.111111,1
1,15.727273,2
2,16.636364,3
3,16.625,4
4,13.6,6


## Number of households

In [34]:
HHnum = house_mf_43.groupby(['village']).count()
HHnum = pd.DataFrame(HHnum['HHnum_in_village'])
HHnum['village']  = HHnum.index
HHnum = HHnum.reset_index(drop=True)
HHnum.head()

Unnamed: 0,HHnum_in_village,village
0,182,1
1,195,2
2,292,3
3,239,4
4,114,6


## Fraction of taking leaders

In [35]:
leader_mf = house_mf_43[house_mf_43.apply(lambda x :x['leader'] == 1 and x['mf_adoption'] == 1, axis = 1)].groupby(['village']).count()
total_leader = house_mf_43[house_mf_43.apply(lambda x :x['leader'] == 1, axis = 1)].groupby(['village']).count()

In [36]:
frac_taking_leaders = leader_mf['mf_adoption']/total_leader['mf_adoption']

In [37]:
frac_taking_leaders = pd.DataFrame({'frac_taking_leaders': frac_taking_leaders})
frac_taking_leaders['village'] = frac_taking_leaders.index
frac_taking_leaders.reset_index(drop=True, inplace=True)
frac_taking_leaders.head()

Unnamed: 0,frac_taking_leaders,village
0,0.321429,1
1,0.272727,2
2,0.057143,3
3,0.111111,4
4,0.136364,6


## Microfinance take-up rate (non-leader households) : output

In [38]:
take_up_rate = {}
for vil_ind in set(house_mf_43.village):
    non_leader_ttl = house_mf_43[house_mf_43.apply(lambda x: x['village'] == int(vil_ind) and x['leader'] == 0, axis = 1)]
    non_leader_taking = house_mf_43[house_mf_43.apply(lambda x:x['village'] == int(vil_ind) and x['leader'] == 0 and x['mf_adoption'] == 1, axis = 1)]
    rate = len(non_leader_taking)/len(non_leader_ttl)
    take_up_rate[vil_ind] = rate

In [39]:
take_up_rate_df = pd.DataFrame.from_dict(data = take_up_rate, orient='index')
take_up_rate_df.rename(columns={0:'take_up_rate'}, inplace=True)
take_up_rate_df['village'] = take_up_rate_df.index
take_up_rate_df.reset_index(drop=True, inplace=True)
take_up_rate_df.head()

Unnamed: 0,take_up_rate,village
0,0.214286,1
1,0.138728,2
2,0.140078,3
3,0.067873,4
4,0.23913,6


In [40]:
dataset = pd.concat([eigen_centrality_df, HHnum,degree_df, frac_taking_leaders, take_up_rate_df, eigen_centrality_taking_df], axis=1)
dataset = dataset[['village', 'eigen_cen_leader', 'HHnum_in_village', 'degree_leader', 'frac_taking_leaders', 'eig_cen_taking', 'take_up_rate']]
dataset = dataset.iloc[:, 5:]
dataset.head()

Unnamed: 0,village,eigen_cen_leader,HHnum_in_village,degree_leader,frac_taking_leaders,eig_cen_taking,take_up_rate
0,1,0.088717,182,21.111111,0.321429,0.077894,0.214286
1,2,0.07825,195,15.727273,0.272727,0.028603,0.138728
2,3,0.064053,292,16.636364,0.057143,0.047894,0.140078
3,4,0.066193,239,16.625,0.111111,0.107557,0.067873
4,6,0.121904,114,13.6,0.136364,0.133721,0.23913


In [41]:
dataset.to_csv('preprocessed_dataset.csv',index=False)

In [15]:
preprocessed_dataset = pd.read_csv('preprocessed_dataset.csv')
preprocessed_dataset.head()

Unnamed: 0,village,eigen_cen_leader,HHnum_in_village,degree_leader,frac_taking_leaders,eig_cen_taking,take_up_rate
0,1,0.088717,182,21.111111,0.321429,0.077894,0.214286
1,2,0.07825,195,15.727273,0.272727,0.028603,0.138728
2,3,0.064053,292,16.636364,0.057143,0.047894,0.140078
3,4,0.066193,239,16.625,0.111111,0.107557,0.067873
4,6,0.121904,114,13.6,0.136364,0.133721,0.23913


In [24]:
preprocessed_dataset.describe()

Unnamed: 0,village,eigen_cen_leader,HHnum_in_village,degree_leader,frac_taking_leaders,eig_cen_taking,take_up_rate
count,43.0,43.0,43.0,43.0,43.0,43.0,43.0
mean,40.139535,0.073386,223.209302,18.101368,0.248356,0.066467,0.184856
std,22.721483,0.017232,56.169623,3.784275,0.125054,0.02984,0.084047
min,1.0,0.042678,114.0,11.888889,0.035714,0.015366,0.067873
25%,22.0,0.063209,182.0,15.533937,0.166209,0.042052,0.135643
50%,43.0,0.069682,207.0,17.555556,0.238095,0.070134,0.166667
75%,59.5,0.082711,257.0,19.890152,0.318609,0.083177,0.217312
max,75.0,0.122779,356.0,28.052632,0.555556,0.133721,0.4375


## Linear Regression

In [16]:
model_1 = smf.ols('take_up_rate ~ eigen_cen_leader + HHnum_in_village', data = preprocessed_dataset).fit()
model_1_rbterr = model_1.HC1_se
model_1_coef = model_1.params
model_1_rsq = model_1.rsquared

In [17]:
model_2 = smf.ols('take_up_rate ~ HHnum_in_village + degree_leader', data = preprocessed_dataset).fit()
model_2_rbterr = model_2.HC1_se
model_2_coef = model_2.params
model_2_rsq = model_2.rsquared

In [18]:
model_3 = smf.ols('take_up_rate ~ eigen_cen_leader+ HHnum_in_village + degree_leader', data = preprocessed_dataset).fit()
model_3_rbterr = model_3.HC1_se
model_3_coef = model_3.params
model_3_rsq = model_3.rsquared

In [19]:
model_5 = smf.ols('take_up_rate ~ eigen_cen_leader+ HHnum_in_village + frac_taking_leaders + eig_cen_taking', data = preprocessed_dataset).fit()
model_5_rbterr = model_5.HC1_se
model_5_coef = model_5.params
model_5_rsq = model_5.rsquared

In [20]:
print("Take-up Rate (1) \n- Eigenvector Centrality of Leaders: %.3f (Robust Standard Error: %.3f)\n- Number of HouseHolds: %.6f (Robust Standard Error: %.6f)\n- Constant: %.3f (Robust Standard Error: %.3f)\n- R-squared: %.3f" %(model_1_coef[1], model_1_rbterr[1], model_1_coef[2], model_1_rbterr[2], model_1_coef[0], model_1_rbterr[0], model_1_rsq ))
print("\nTake-up Rate (2) \n- Number of HouseHolds: %.6f (Robust Standard Error: %.6f)\n- Degree of Leaders: %.5f (Robust Standard Error: %.5f)\n- Constant: %.3f (Robust Standard Error: %.4f)\n- R-squared: %.3f" %(model_2_coef[1], model_2_rbterr[1], model_2_coef[2], model_2_rbterr[2], model_2_coef[0], model_2_rbterr[0], model_2_rsq))
print("\nTake-up Rate (3) \n- Eigenvector Centrality of Leaders: %.3f (Robust Standard Error: %.3f)\n- Number of HouseHolds: %.6f (Robust Standard Error: %.6f)\n- Degree of Leaders: %.5f (Robust Standard Error: %.5f)\n- Constant: %.3f (Robust Standard Error: %.3f)\n- R-squared: %.3f" %(model_3_coef[1], model_3_rbterr[1], model_3_coef[2], model_3_rbterr[2], model_3_coef[3], model_3_rbterr[3], model_3_coef[0], model_3_rbterr[0], model_3_rsq ))
print("\nTake-up Rate (5) \n- Eigenvector Centrality of Leaders: %.3f (Robust Standard Error: %.3f)\n- Number of HouseHolds: %.6f (Robust Standard Error: %.6f)\n- Fraction of Taking Leaders: %.3f (Robust Standard Error: %.3f)\n- Eigenvector Centrality of Taking Leaders: %.3f (Robust Standard Error: %.3f)\n- Constant: %.4f (Robust Standard Error: %.4f)\n- R-squared: %.3f" %(model_5_coef[1], model_5_rbterr[1],model_5_coef[2], model_5_rbterr[2], model_5_coef[3], model_5_rbterr[3], model_5_coef[4], model_5_rbterr[4], model_5_coef[0], model_5_rbterr[0], model_5_rsq ))

Take-up Rate (1) 
- Eigenvector Centrality of Leaders: 1.634 (Robust Standard Error: 0.904)
- Number of HouseHolds: -0.000382 (Robust Standard Error: 0.000247)
- Constant: 0.150 (Robust Standard Error: 0.112)
- R-squared: 0.293

Take-up Rate (2) 
- Number of HouseHolds: -0.000704 (Robust Standard Error: 0.000188)
- Degree of Leaders: -0.00111 (Robust Standard Error: 0.00231)
- Constant: 0.362 (Robust Standard Error: 0.0573)
- R-squared: 0.235

Take-up Rate (3) 
- Eigenvector Centrality of Leaders: 1.934 (Robust Standard Error: 0.967)
- Number of HouseHolds: -0.000270 (Robust Standard Error: 0.000270)
- Degree of Leaders: -0.00324 (Robust Standard Error: 0.00259)
- Constant: 0.162 (Robust Standard Error: 0.107)
- R-squared: 0.311

Take-up Rate (5) 
- Eigenvector Centrality of Leaders: 1.254 (Robust Standard Error: 0.735)
- Number of HouseHolds: -0.000305 (Robust Standard Error: 0.000216)
- Fraction of Taking Leaders: 0.323 (Robust Standard Error: 0.101)
- Eigenvector Centrality of Takin

In [23]:
model_1.summary()

0,1,2,3
Dep. Variable:,take_up_rate,R-squared:,0.293
Model:,OLS,Adj. R-squared:,0.258
Method:,Least Squares,F-statistic:,8.292
Date:,"Thu, 14 Dec 2017",Prob (F-statistic):,0.000971
Time:,11:45:40,Log-Likelihood:,53.433
No. Observations:,43,AIC:,-100.9
Df Residuals:,40,BIC:,-95.58
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1503,0.115,1.309,0.198,-0.082,0.382
eigen_cen_leader,1.6336,0.880,1.856,0.071,-0.145,3.412
HHnum_in_village,-0.0004,0.000,-1.416,0.165,-0.001,0.000

0,1,2,3
Omnibus:,10.005,Durbin-Watson:,1.565
Prob(Omnibus):,0.007,Jarque-Bera (JB):,9.258
Skew:,1.002,Prob(JB):,0.00977
Kurtosis:,4.075,Cond. No.,18500.0


In [28]:
preprocessed_dataset

Unnamed: 0,village,eigen_cen_leader,HHnum_in_village,degree_leader,frac_taking_leaders,eig_cen_taking,take_up_rate
0,1,0.088717,182,21.111111,0.321429,0.077894,0.214286
1,2,0.07825,195,15.727273,0.272727,0.028603,0.138728
2,3,0.064053,292,16.636364,0.057143,0.047894,0.140078
3,4,0.066193,239,16.625,0.111111,0.107557,0.067873
4,6,0.121904,114,13.6,0.136364,0.133721,0.23913
5,9,0.09711,207,24.058824,0.241379,0.093952,0.168539
6,12,0.09385,175,15.529412,0.185185,0.121728,0.331081
7,15,0.083333,171,15.368421,0.035714,0.08424,0.132867
8,19,0.082088,204,20.363636,0.185185,0.088203,0.20904
9,20,0.122779,156,17.0,0.5,0.118817,0.39726


In [29]:
from sklearn.linear_model import LinearRegression

In [30]:
X = preprocessed_dataset.drop(['village', 'take_up_rate'], axis = 1)

In [31]:
y = preprocessed_dataset['take_up_rate']

In [32]:
model = LinearRegression()
model.fit(X, y)
model.coef_

array([  1.84543644e+00,  -1.83946522e-04,  -3.45854612e-03,
         3.14205822e-01,  -3.96374937e-01])

In [33]:
model_2 = smf.ols(formula='take_up_rate ~ eigen_cen_leader+HHnum_in_village+degree_leader+frac_taking_leaders+eig_cen_taking', data = preprocessed_dataset).fit()

In [34]:
model_2.summary()

0,1,2,3
Dep. Variable:,take_up_rate,R-squared:,0.519
Model:,OLS,Adj. R-squared:,0.454
Method:,Least Squares,F-statistic:,7.975
Date:,"Thu, 14 Dec 2017",Prob (F-statistic):,3.53e-05
Time:,12:53:10,Log-Likelihood:,61.698
No. Observations:,43,AIC:,-111.4
Df Residuals:,37,BIC:,-100.8
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1014,0.100,1.012,0.318,-0.102,0.304
eigen_cen_leader,1.8454,1.047,1.762,0.086,-0.276,3.967
HHnum_in_village,-0.0002,0.000,-0.720,0.476,-0.001,0.000
degree_leader,-0.0035,0.003,-1.140,0.262,-0.010,0.003
frac_taking_leaders,0.3142,0.081,3.857,0.000,0.149,0.479
eig_cen_taking,-0.3964,0.463,-0.857,0.397,-1.334,0.541

0,1,2,3
Omnibus:,2.331,Durbin-Watson:,1.992
Prob(Omnibus):,0.312,Jarque-Bera (JB):,2.04
Skew:,0.524,Prob(JB):,0.361
Kurtosis:,2.799,Cond. No.,26600.0
