In [27]:
# you 1st need to right click on the shared folder and select 'Add to my Drive'
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [28]:
#paths to Datasets
ng_state_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/nga_adm_osgof_20190417/nga_admbnda_adm1_osgof_20190417.shp' 
zonal_stats_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/nga_lga_zonal_statistics_2016.csv'
pop_den_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/Nigeria_Population_Density_by_State_as_at_2016.csv'
dhs_survey_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/dhs_survey/NGPR7ADT/NGPR7AFL.DTA'
dhs_survey_dict_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/dhs_survey/NGPR7ADT/NGPR7AFL.DO'
lga_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/nigeria-lgas/new_lga_nigeria_2003.shp'
dhs_gps_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/NG_2018_DHS_07172021_114_165261_gps/NGGE7BFL/NGGE7BFL.shp'
fb_sett_img_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/hrsl_nga_v1_fb_settlement/hrsl_nga_settlement.tif'
fb_pop_img_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/hrsl_nga_v1_fb_settlement/hrsl_nga_pop.tif'
per_house_no_elect='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/DHSstatcompiler_Ng_%_households_with_no_electricity_2018.xlsx'
per_pop__no_elect='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/DHS_STATcompilerExport_%_Pop_without_electricity_2018.xlsx'

In [29]:


!pip install -q geopandas
import os
import glob

import numpy as np
import pandas as pd
from functools import reduce


In [30]:

import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib inline

In [31]:
#Helpers
def get_dhs_dict(dhs_dict_file):
  dhs_dict = dict()
  with open(dhs_dict_file, 'r', errors='replace') as file:
    line = file.readline()
    while line:
      line = file.readline()
      if 'label variable' in line:
        code = line.split()[2]
        colname = ' '.join([x.strip('"') for x in line.split()[3:]])
        dhs_dict[code] = colname
  return dhs_dict

def cleaning_txt(x):
  return x.str.replace("Region : ..".strip(), "").apply(lambda a:str(a).lower())

#function to change codes values to actual string values as described in the metadata
def dhs_preprocessing(df,df_survey_dict_path,prep_list):
  df_metadata=get_dhs_dict(dhs_survey_dict_path)
  df.columns = df.columns.map(df_metadata)
  #i=range(0,len(x))
  df=df[[prep_list[0]]]
  for i in range(1,len(x)):
    df.replace(prep_list[i],inplace=True)
  return df

 #I manually extract the important columns needed for the analysis
 #and converted to list
important_cols=list(
{'hhid': 'Case Identification',
'hv000': 'Country code and phase',
'hv002': 'Household number',
'hv004': 'Ultimate area unit',
 'hv023': 'Stratification used in sample design',
 'hv024': 'Region',
 'hv025': 'Type of place of residence',
 'hv040': 'Cluster altitude in meters',
'hv206': 'Has electricity',
 'hv208': 'Has television',
 'hv209': 'Has refrigerator',
'sh121m': 'Has electric iron',
'hv235': 'Location of source for water',
'hv243a': 'Has mobile telephone',
 'hv243e': 'Has a computer',
 'shstate': 'State',
 'sh121l': 'Has air conditioner',
'hv009': 'Number of household members'}.values())


#convert state codes to state strings
state_dict={10:"Sokoto",
    20:"Zamfara",
    30:"Katsina",
    40:"Jigawa",
    50:"Yobe",
    60:"Borno",
    70:"Adamawa",
    80:"Gombe",
    90:"Bauchi",
   100:"Kano",
   110:"Kaduna",
   120:"Kebbi",
   130:"Niger",
   140:"FCT Abuja",
   150:"Nasarawa",
   160:"Plateau",
   170:"Taraba",
   180:"Benue",
   190:"Kogi",
   200:"Kwara",
   210:"Oyo",
   220:"Osun",
   230:"Ekiti",
   240:"Ondo",
   250:"Edo",
   260:"Anambra",
   270:"Enugu",
   280:"Ebonyi",
   290:"Cross River",
   300:"Akwa Ibom",
   310:"Abia",
   320:"Imo",
   330:"Rivers",
   340:"Bayelsa",
   350:"Delta",
   360:"Lagos",
   370:"Ogun"}

#convert strata codes to strata strings
stratum_dict={1:"NC Benue Urban",
     2:"NC Benue Rural",
     3:"NC FCT Abuja Urban",
     4:"NC FCT Abuja Rural",
     5:"NC Kogi Urban",
     6:"NC Kogi Rural",
     7:"NC Kwara Urban",
     8:"NC Kwara Rural",
     9:"NC Nasarawa Urban",
    10:"NC Nasarawa Rural",
    11:"NC Niger Urban",
    12:"NC Niger Rural",
    13:"NC Plateau Urban",
    14:"NC Plateau Rural",
    15:"NE Adamawa Urban",
    16:"NE Adamawa Rural",
    17:"NE Bauchi Urban",
    18:"NE Bauchi Rural",
    19:"NE Borno Urban",
    20:"NE Borno Rural",
    21:"NE Gombe Urban",
    22:"NE Gombe Rural",
    23:"NE Taraba Urban",
    24:"NE Taraba Rural",
    25:"NE Yobe Urban",
    26:"NE Yobe Rural",
    27:"NW Jigawa Urban",
    28:"NW Jigawa Rural",
    29:"NW Kaduna Urban",
    30:"NW Kaduna Rural",
    31:"NW Kano Urban",
    32:"NW Kano Rural",
    33:"NW Katsina Urban",
    34:"NW Katsina Rural",
    35:"NW Kebbi Urban",
    36:"NW Kebbi Rural",
    37:"NW Sokoto Urban",
    38:"NW Sokoto Rural",
    39:"NW Zamfara Urban",
    40:"NW Zamfara Rural",
    41:"SE Abia Urban",
    42:"SE Abia Rural",
    43:"SE Anambra Urban",
    44:"SE Anambra Rural",
    45:"SE Ebonyi Urban",
    46:"SE Ebonyi Rural",
    47:"SE Enugu Urban",
    48:"SE Enugu Rural",
    49:"SE Imo Urban",
    50:"SE Imo Rural",
    51:"SS Akwa Ibom Urban",
    52:"SS Akwa Ibom Rural",
    53:"SS Bayelsa Urban",
    54:"SS Bayelsa Rural",
    55:"SS Cross River Urban",
    56:"SS Cross River Rural",
    57:"SS Delta Urban",
    58:"SS Delta Rural",
    59:"SS Edo Urban",
    60:"SS Edo Rural",
    61:"SS Rivers Urban",
    62:"SS Rivers Rural",
    63:"SW Ekiti Urban",
    64:"SW Ekiti Rural",
    65:"SW Lagos Urban",
    66:"SW Lagos Rural",
    67:"SW Ogun Urban",
    68:"SW Ogun Rural",
    69:"SW Ondo Urban",
    70:"SW Ondo Rural",
    71:"SW Osun Urban",
    72:"SW Osun Rural",
    73:"SW Oyo Urban",
    74:"SW Oyo Rural"}

#convert region codes to region strings
region_dict={1:"North Central",
     2:"North East",
     3:"North West",
     4:"South East",
     5:"South South",
     6:"South West"}
#place codes to place strings
place_type_dict={1:"Urban",2:"Rural"}

#prep_list=[important_cols,{"State":  state_dict},{'Stratification used in sample design':stratum_dict},{'Region':region_dict},{'Type of place of residence':place_type_dict}]

In [32]:
#Loading DHS stata files
dhs_survey_data =pd.read_stata(dhs_survey_path,convert_categoricals=False)
print('Data Dimensions: {}'.format(dhs_survey_data.shape))

Data Dimensions: (188010, 357)


In [33]:
#preprocessing Dhs_data  
#look into this later
#dhs_analysis_data=dhs_preprocessing(dhs_survey_data,dhs_survey_dict_path,prep_list)


In [34]:
#changing the names of columns
dhs_survey_dict=get_dhs_dict(dhs_survey_dict_path)
dhs_survey_data.columns = dhs_survey_data.columns.map(dhs_survey_dict)

In [35]:
dhs_survey_data.head(5)

Unnamed: 0,Case Identification,Line number,Country code and phase,Cluster number,Household number,Respondent's line number (answering Household questionnaire),Ultimate area unit,Household sample weight (6 decimals),Month of interview,Year of interview,Date of interview (CMC),Date of interview Century Day Code (CDC),Number of household members,Number of eligible women in household,Number of eligible men in household,Number of de jure members,Number of de facto members,Number of children 5 and under (de jure),Result of household interview,Day of interview,Number of visits,Interviewer identification,NA - Keyer identification,Ever-married sample,Primary sampling unit,Sample strata for sampling errors,Stratification used in sample design,Region,Type of place of residence,NA - Place of residence,Household selected for male interview,Household weight for male subsample (6 decimals),Field supervisor,NA - Field editor,NA - Office editor,Number of eligible children for height and weight,Cluster altitude in meters,Total adults measured,Household selected for hemoglobin,Household selected for Domestic Violence module,...,NA - Line number of person who slept in this net,"Obtained net from campaign, antenatal or immunization visit",Place where net was obtained,Shown Net,Reason not sleeping inside the net,Index to Household Schedule,Type of Mosquito Bed Net(s) person slept under last night,Net Designation Number (HMLIDX) for 1st net person slept under last night,Net Designation Number (HMLIDX) for 2nd net person slept under last night,Net Designation Number (HMLIDX) for 3rd net person slept under last night,Corrected age from Individual file,Age in months (for children),Flag for age from Individual file,Pregnancy status from Individual file,Person slept under an ever-treated net,Person slept under an LLIN net,Line number of parent/caretaker (for malaria testing),Read consent statement for malaria,Final result of malaria from blood smear test,NA - Presence of species: falciparum (Pf),NA - Presence of species: malariae (Pm),NA - Presence of species: ovale (Po),NA - Presence of species: vivax (Pv),NA - Presence of species: CS,NA - Presence of species: CS.1,NA - Presence of species: CS.2,Result of malaria measurement,Bar code for blood smear sample,Result of malaria rapid test,Fieldworker measurer code for malaria,Index to household schedule,Wear glasses or contact lenses,Have difficulty seeing,Wear a hearing aid,Have difficulty hearing,Have difficulty communicating using usual language,Have difficulty remembering or concentrating,Have difficulty walking or climbing steps,Have difficulty washing all over or dressing,Highest degree of difficulty for any of the impairments
0,1 1,1,NG7,1,1,1,1,1368354,9,2018,1425,43345,5,1,0,5,5,1,1,2,1,702,,0,1,1,1,1,1,,0,0,701,,,0,100,0,0,0,...,,2.0,,1.0,,1,1,2,,,82,,0,,0,0,,,,,,,,,,,,,,,1,1.0,2.0,0.0,1.0,1.0,2.0,2.0,2.0,2.0
1,1 1,2,NG7,1,1,1,1,1368354,9,2018,1425,43345,5,1,0,5,5,1,1,2,1,702,,0,1,1,1,1,1,,0,0,701,,,0,100,0,0,0,...,,2.0,,1.0,,2,1,1,,,40,,1,0.0,0,0,,,,,,,,,,,,,,,2,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
2,1 1,3,NG7,1,1,1,1,1368354,9,2018,1425,43345,5,1,0,5,5,1,1,2,1,702,,0,1,1,1,1,1,,0,0,701,,,0,100,0,0,0,...,,,,,,3,0,0,,,18,,0,,0,0,,,,,,,,,,,,,,,3,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1 1,4,NG7,1,1,1,1,1368354,9,2018,1425,43345,5,1,0,5,5,1,1,2,1,702,,0,1,1,1,1,1,,0,0,701,,,0,100,0,0,0,...,,,,,,4,0,0,,,11,,0,,0,0,,,,,,,,,,,,,,,4,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1 1,5,NG7,1,1,1,1,1368354,9,2018,1425,43345,5,1,0,5,5,1,1,2,1,702,,0,1,1,1,1,1,,0,0,701,,,0,100,0,0,0,...,,2.0,,1.0,,5,1,1,,,1,,0,,0,0,,,,,,,,,,,,,,,5,,,,,,,,,


In [36]:
#creating a subset from the main dataset
dhs_analysis_data=dhs_survey_data[important_cols]
# Remap the values of the dataframe
dhs_analysis_data.replace({"State":  state_dict},inplace=True)
dhs_analysis_data.replace({'Stratification used in sample design':  stratum_dict},inplace=True)
dhs_analysis_data.replace({'Region':region_dict},inplace=True)
dhs_analysis_data.replace({'Type of place of residence':place_type_dict},inplace=True)
#changing some values to regular values
dhs_analysis_data.loc[dhs_analysis_data.State == 'FCT Abuja', 'State'] = 'Abuja'
dhs_analysis_data.loc[dhs_analysis_data.State == 'Nasarawa', 'State'] = 'Nassarawa'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


In [37]:
# Define the aggregation procedure outside of the groupby operation

'''
households_total_per_state is same as htps
percent_sample_households_with_no_electricity is same as pshwne per state
percent_sample_households_with_television is same as pshwt
percent_sample_households_with_refegerator is same as pshwr
percent_sample_households_with_iron is same as pshwi
percent_sample_households_with_phone is same as pshwp
percent_sample_households_with_computer is same as pshwc
percent_sample_households_with_air_conditioner is same as pshwa
Sample_pop: 

'''


dhs_state_data=dhs_analysis_data.groupby('State').agg(
  
  htps=('Has electricity','sum'),
  pshwe=('Has electricity' , 'mean'),
  pshwt= ('Has television', 'mean'),
  pshwr= ('Has refrigerator', 'mean'),
  pshwi= ('Has electric iron', 'mean'),
  pshwp= ('Has mobile telephone', 'mean'),
  pshwc= ('Has a computer', 'mean'),
  pshwac= ('Has air conditioner', 'mean'),
  Sample_pop=('Number of household members', 'sum')
)


In [38]:
# overwrite index (state names) to match nigeria-states
dhs_state_data.reset_index(inplace=True)
dhs_state_data.State =dhs_state_data.State.astype(str)
dhs_state_data.set_index('State', drop=True, inplace=True)
dhs_state_data.index =dhs_state_data.index.str.lower()


In [39]:
#loading of excel files
statcompiler_house_no_elect = pd.read_excel(per_house_no_elect,
                   sheet_name="Indicator Data",
                   skiprows=[i for i in range(1,7)])

statcompiler_pop_no_elect = pd.read_excel(per_pop__no_elect,
                   sheet_name="Indicator Data",
                   skiprows=[i for i in range(1,8)])

In [40]:
"""
Characteristics is same as STATE as describe in the dataset dictioanry
sites:
This statcompiler_pop_no_elect conotes percentage of population without electricity
"""
statcompiler_pop_no_elect['Characteristic']=cleaning_txt(statcompiler_pop_no_elect['Characteristic'])
statcompiler_pop_no_elect=statcompiler_pop_no_elect[['Characteristic','Population with no electricity']]
statcompiler_pop_no_elect.rename(columns={'Characteristic':'STATE','Population with no electricity':'%pop_with_no_elect'}, inplace = True)
#statcompiler_pop_no_elect.loc[statcompiler_pop_no_elect.STATE == 'fct abuja', 'STATE'] = 'abuja'
#statcompiler_pop_no_elect.loc[statcompiler_pop_no_elect.STATE == 'Nasarawa', 'STATE'] = 'nassarawa'
#statcompiler_pop_no_elect.STATE.value_counts()
statcompiler_pop_no_elect.reset_index(inplace=True)
statcompiler_pop_no_elect.STATE =statcompiler_pop_no_elect.STATE.astype(str)
statcompiler_pop_no_elect.set_index('STATE', drop=True, inplace=True)
statcompiler_pop_no_elect.index =statcompiler_pop_no_elect.index.str.lower()
statcompiler_pop_no_elect.rename(index={'fct abuja':'abuja','nasarawa':'nassarawa'},inplace=True)






In [41]:
statcompiler_house_no_elect['Characteristic']=cleaning_txt(statcompiler_house_no_elect['Characteristic'])

In [42]:
"""
Characteristics is same as "STATE" as describe in the dataset dictioanry from Dhs statcompiller sites
sites:
This statcompiler_house_no_elect conotes percentage of households without electricity
"""
statcompiler_house_no_elect=statcompiler_house_no_elect[['Characteristic','Households with no electricity']]
statcompiler_house_no_elect.rename(columns={'Characteristic':'STATE','Households with no electricity':'%_with_no_elect'}, inplace = True)
#statcompiler_house_no_elect.loc[statcompiler_house_no_elect.STATE =='fct abuja', 'STATE'] = 'abuja'
#statcompiler_house_no_elect.loc[statcompiler_house_no_elect.STATE == 'nasarawa', 'STATE'] = 'nassarawa'
#statcompiler_house_no_elect.STATE.value_counts()
statcompiler_house_no_elect.reset_index(inplace=True)
statcompiler_house_no_elect.STATE =statcompiler_house_no_elect.STATE.astype(str)
statcompiler_house_no_elect.set_index('STATE', drop=True, inplace=True)
statcompiler_house_no_elect.index =statcompiler_house_no_elect.index.str.lower()
statcompiler_house_no_elect.rename(index={'fct abuja':'abuja','nasarawa':'nassarawa'},inplace=True)




In [43]:
# zonal statistics  dataset from night time dataset

zonal_stats_data=pd.read_csv(zonal_stats_path,usecols=['mean','STATE'])
#zonal_stats_data.STATE.value_counts()
#zonal_stats_data.info()
#zonal_stats_data.columns=['Mean_Avg_rad','State']
#this sums up all avg_rad for all LGA per states
zonal_stats_data=zonal_stats_data.groupby('STATE').sum()
zonal_stats_data.reset_index(inplace=True)
zonal_stats_data.STATE =zonal_stats_data.STATE.astype(str)
zonal_stats_data.set_index('STATE', drop=True, inplace=True)
zonal_stats_data.index =zonal_stats_data.index.str.lower()


In [44]:
pop_den_data=pd.read_csv(pop_den_path,usecols=['NAME_1','nga_pop__1'])
pop_den_data.NAME_1 =pop_den_data.NAME_1.astype(str)
pop_den_data.columns=['STATE','POPULATION']
pop_den_data.reset_index(inplace=True)
pop_den_data.set_index('STATE', drop=True, inplace=True)
pop_den_data.index =pop_den_data.index.str.lower()
pop_den_data.rename(index={'federal capital territory':'abuja',},inplace=True)



In [45]:
#loading the nigeria lga dataset
ng = gpd.read_file(ng_state_path)
#dhs_gps_data = gpd.read_file(dhs_gps_path)
ng_data=ng[['ADM1_REF','Shape_Leng','Shape_Area','geometry']].rename(columns={'ADM1_REF': 'STATE', 'Shape_Leng': 'Shape_Leng','Shape_Area':'Shape_Area','geometry':'Geometry'},inplace=False)
ng_data.reset_index(inplace=True)
ng_data.set_index('STATE', drop=True, inplace=True)
ng_data.index =ng_data.index.str.lower()
ng_data.rename(index={'federal capital territory':'abuja','nasarawa':'nassarawa'},inplace=True)



In [46]:
#solution 1
dfs1=[ng_data,pop_den_data,zonal_stats_data,statcompiler_house_no_elect,statcompiler_pop_no_elect,dhs_state_data]
combined1=dfs1[0].join(dfs1[1:])
nan_value = 0
dfs2=[ng_data,pop_den_data,zonal_stats_data,statcompiler_house_no_elect,statcompiler_pop_no_elect,dhs_state_data]
combined2 = reduce(lambda df_left,df_right: pd.merge(df_left, df_right, 
                                              left_index=True, right_index=True, 
                                              how='outer'), dfs2)


In [47]:
combined1

Unnamed: 0_level_0,index_x,Shape_Leng,Shape_Area,Geometry,index_y,POPULATION,mean,index_x,%_with_no_elect,index_y,%pop_with_no_elect,htps,pshwe,pshwt,pshwr,pshwi,pshwp,pshwc,pshwac,Sample_pop
STATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
abia,0,4.695135,0.396543,"POLYGON ((7.38681 6.03667, 7.38729 6.03605, 7....",0,3644714,9.893536,20,10.5,20,8.9,3677.0,0.891178,0.771207,0.411537,0.571983,0.956617,0.078769,0.046534,22232.0
abuja,14,3.498412,0.607222,"POLYGON ((7.67239 9.41128, 7.71959 9.34635, 7....",14,2996670,4.913411,0,19.2,0,22.7,3440.0,0.727119,0.703868,0.406679,0.506024,0.928979,0.164236,0.10019,33251.0
adamawa,1,11.525443,3.113007,"POLYGON ((13.62129 10.94823, 13.62592 10.94822...",1,4145684,0.740757,7,55.2,7,57.3,1737.0,0.3653,0.326814,0.100736,0.13123,0.784648,0.052787,0.02776,33423.0
akwa ibom,2,5.26383,0.549476,"MULTIPOLYGON (((8.34482 4.61140, 8.34496 4.609...",2,5353609,41.608243,31,26.3,31,25.3,2893.0,0.717688,0.600595,0.276358,0.398412,0.888613,0.067229,0.0129,20921.0
anambra,3,3.59596,0.392661,"POLYGON ((6.93254 6.71090, 6.93167 6.69870, 6....",3,5425334,12.464056,21,18.5,21,16.1,4127.0,0.83695,0.750355,0.412898,0.467045,0.982357,0.061651,0.012371,26223.0
bauchi,4,13.952005,4.011018,"POLYGON ((10.75125 12.46148, 10.75615 12.39191...",4,6386388,0.052741,8,65.6,8,63.6,2514.0,0.350774,0.205386,0.07409,0.117622,0.783452,0.039626,0.015627,61599.0
bayelsa,5,5.046708,0.776768,"POLYGON ((6.55283 5.37988, 6.56198 5.37925, 6....",5,2228965,21.421858,32,52.9,32,51.2,1676.0,0.4421,0.553944,0.27961,0.421525,0.927987,0.047745,0.03139,21605.0
benue,6,9.40808,2.578363,"POLYGON ((8.52442 8.15727, 8.54240 8.14873, 8....",6,5568946,0.775961,1,56.1,1,58.7,1943.0,0.393639,0.436588,0.129862,0.199959,0.889587,0.048825,0.006078,27990.0
borno,7,13.714364,5.987849,"POLYGON ((13.35885 13.71261, 13.36207 13.71153...",7,5669054,0.980581,9,57.4,9,52.9,2089.0,0.354248,0.299644,0.153637,0.22079,0.852128,0.060878,0.032559,43997.0
cross river,8,8.779796,1.711218,"MULTIPOLYGON (((8.56068 4.79847, 8.55944 4.798...",8,3780419,2.591886,33,46.5,33,46.6,1620.0,0.512821,0.604622,0.256094,0.328268,0.813865,0.062362,0.033238,14401.0


In [48]:
combined2.head()

Unnamed: 0,index_x,Shape_Leng,Shape_Area,Geometry,index_y,POPULATION,mean,index_x.1,%_with_no_elect,index_y.1,%pop_with_no_elect,htps,pshwe,pshwt,pshwr,pshwi,pshwp,pshwc,pshwac,Sample_pop
abia,0.0,4.695135,0.396543,"POLYGON ((7.38681 6.03667, 7.38729 6.03605, 7....",0.0,3644714.0,9.893536,20.0,10.5,20.0,8.9,3677.0,0.891178,0.771207,0.411537,0.571983,0.956617,0.078769,0.046534,22232.0
abuja,14.0,3.498412,0.607222,"POLYGON ((7.67239 9.41128, 7.71959 9.34635, 7....",14.0,2996670.0,4.913411,0.0,19.2,0.0,22.7,3440.0,0.727119,0.703868,0.406679,0.506024,0.928979,0.164236,0.10019,33251.0
adamawa,1.0,11.525443,3.113007,"POLYGON ((13.62129 10.94823, 13.62592 10.94822...",1.0,4145684.0,0.740757,7.0,55.2,7.0,57.3,1737.0,0.3653,0.326814,0.100736,0.13123,0.784648,0.052787,0.02776,33423.0
akwa ibom,2.0,5.26383,0.549476,"MULTIPOLYGON (((8.34482 4.61140, 8.34496 4.609...",2.0,5353609.0,41.608243,31.0,26.3,31.0,25.3,2893.0,0.717688,0.600595,0.276358,0.398412,0.888613,0.067229,0.0129,20921.0
anambra,3.0,3.59596,0.392661,"POLYGON ((6.93254 6.71090, 6.93167 6.69870, 6....",3.0,5425334.0,12.464056,21.0,18.5,21.0,16.1,4127.0,0.83695,0.750355,0.412898,0.467045,0.982357,0.061651,0.012371,26223.0


In [49]:
"""Column description 
'Shape_Leng': Length of state polygon from map
'Shape_Area': Area coverage of state from shape file
'Geometry': points of state polygon  coverage
'POPULATION' : Population of state
'mean' : avg_rad per state from(night time zonal statistics)
'%_with_no_elect' : % of house sample households without electricity
'%pop_with_no_elect' : % of pop without electricity
households_total_per_state is same as htps
percent_sample_households_with_no_electricity : is same as pshwne per state
percent_sample_households_with_television : is same as pshwt
percent_sample_households_with_refegerator : is same as pshwr
percent_sample_households_with_iron : is same as pshwi
percent_sample_households_with_phone : is same as pshwp
percent_sample_households_with_computer : is same as pshwc
percent_sample_households_with_air_conditioner : is same as pshwa
'Sample_pop':
"""
combined1=combined1[['Shape_Leng', 'Shape_Area', 'Geometry',
       'POPULATION', 'mean', '%_with_no_elect',
       '%pop_with_no_elect', 'htps', 'pshwe', 'pshwt', 'pshwr', 'pshwi',
       'pshwp', 'pshwc', 'pshwac', 'Sample_pop']]

In [50]:
combined1.to_csv('state_electricity.csv', index=False)