In [172]:
import pandas as pd
import geopandas as gpd
import numpy as np
import shapely
from tqdm import tqdm

pd.set_option('display.max_rows',500)

# Import Org demand

In [2]:
df = gpd.read_file('OrgDemand.csv')

## Get geometry

In [3]:
gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.lon.astype('float64'), df.lat.astype('float64')))

In [4]:
gdf.head()

Unnamed: 0,type,name,state,lat,lon,geometry
0,healthcare,State Fire service,Abia,5.1137,7.3761,POINT (7.37610 5.11370)
1,healthcare,Still Waters Ambulance Services,Abia,5.4558,7.8828,POINT (7.88280 5.45580)
2,healthcare,NASA Ambulance Services,Abia,5.7643,7.6726,POINT (7.67260 5.76430)
3,healthcare,Allied Mortuary and Funeral Services,Abia,5.1613,7.3294,POINT (7.32940 5.16130)
4,healthcare,Paradise Ambulance,Abia,5.0286,7.3155,POINT (7.31550 5.02860)


In [5]:
vc = gdf['geometry'].astype('str').value_counts()
dupl = vc[vc>1].index.tolist()

In [6]:
gdf['geometry_str'] = gdf['geometry'].astype('str')

In [7]:
vc.shape

(148975,)

We have ca. 150k unique points for ca 260k organisations.

In [8]:
gdf.shape

(259345, 7)

In [10]:
# for every point
df_unique = pd.DataFrame()
for point in tqdm(vc.index):
    # get all data with the same point coordinates
    filter_duplicate = gdf['geometry_str'] == point
    temp = gdf.loc[filter_duplicate,:].drop_duplicates(keep='first',subset = ['name'])
    df_unique = pd.concat([df_unique,temp],axis=0,sort=False)

100%|████████████████████████████████████████████████████████████████████████| 148975/148975 [5:34:55<00:00,  7.41it/s]


In [13]:
df_unique.to_csv('OrgDemand_unique.csv')

# df_unique to gdf

In [14]:
df_unique.columns.tolist()

['type', 'name', 'state', 'lat', 'lon', 'geometry', 'geometry_str']

In [20]:
gdf = gpd.GeoDataFrame(
    df_unique.drop(labels = ['geometry','geometry_str'],axis=1), geometry=gpd.points_from_xy(df_unique.lon.astype('float64'), df_unique.lat.astype('float64')))

In [21]:
gdf.sample()

Unnamed: 0,type,name,state,lat,lon,geometry
171366,school,Delsit Nursery and Primary School,Lagos,6.5187,3.3342,POINT (3.33420 6.51870)


# 1 km clusters

## Load files

In [48]:
clust = pd.read_excel('D:/Google Drive/01 Data/12 Omdena/Repo/clusters_1km/clusters.xlsx')

In [49]:
clust.sample()

Unnamed: 0,label,population,area,density,address,address0,geolocator,label_out,geometry,centroid,ongrid,adm1,adm2,score,pred_solar,org,No. healthcare units,No. school units
171,863,4081,0,6887,"Tunga Bombo, 871, Kebbi, Nigeria",Tunga Bombo,Photon,"863-Tunga Bombo=4,081","POLYGON ((4.180833331248492 11.35250015587251,...",POINT (4.181414139326963 11.35675520636046),0,Kebbi,Bagudo,6887,5.839046,"['Tuga Health Clinic', 'Tuga Primary School', ...",1,2


In [50]:
def get_num_schools_healthcare(row,gdf):
    """get number of schools/healthcare units per cluster"""
    
    poly = poly = shapely.wkt.loads(row['geometry'])
    
    row['No. healthcare units'] = gdf.loc[gdf['type']=='healthcare',:].within(poly).sum()
    row['No. school units'] = gdf.loc[gdf['type']=='school',:].within(poly).sum()
    
    return row

# Apply per row

In [51]:
tqdm.pandas() # to be able to see the progress of the apply function
clust_new = clust.progress_apply(get_num_schools_healthcare,gdf = gdf, axis=1)

  from pandas import Panel
100%|██████████████████████████████████████████████████████████████████████████████| 2119/2119 [52:04<00:00,  1.47s/it]


In [52]:
clust_new.sample()

Unnamed: 0,label,population,area,density,address,address0,geolocator,label_out,geometry,centroid,ongrid,adm1,adm2,score,pred_solar,org,No. healthcare units,No. school units
1575,471,5832,0,14450,"Gumawa Health Post, Kano-Gumel Road, 187, Kano...",Gumawa Health Post,Photon,"471-Gumawa Health Post=5,832","POLYGON ((8.862499979075452 12.13500015272787,...",POINT (8.862964722663317 12.13839758861167),1,Kano,Gabasawa,0,5.826171,"['Gumawa Health Post', 'Madaki Gumawa Islamiyy...",1,2


## Check results

### Clusters without organisation names, but no. schools or healthcare > 0

In [71]:
# Check clusters without organisation names, but with positive no. schools or healthcare
filter_no_org = (clust_new.loc[clust_new['org'].isna(),:][['No. healthcare units','No. school units']].sum(axis=1)>1)
no_org_index = filter_no_org.replace(False,np.nan).dropna().index

In [95]:
def get_org_names(row,gdf):
    poly = shapely.wkt.loads(row['geometry'])

    row['org names'] = gdf.loc[gdf.within(poly),'name'].tolist()
    return row

In [101]:
clust_new.loc[no_org_index[:5],:].apply(get_org_names,gdf=gdf,axis=1)

Unnamed: 0,label,population,area,density,address,address0,geolocator,label_out,geometry,centroid,ongrid,adm1,adm2,score,pred_solar,org,No. healthcare units,No. school units,org names
39,306,4419,0,11435,"Jerau, Jigawa, Nigeria",Jerau,Photon,"306-Jerau=4,419","POLYGON ((10.31916663988028 12.45500015144188,...",POINT (10.31601446597992 12.45744580360597),0,Bauchi,Zaki,11435,5.486398,,0,3,"[Baturiya Primary School, Isilamic Primary Sch..."
57,1361,16071,1,10397,"Dobi, Gwagwalada, Federal Capital Territory, N...",Dobi,Nominatim,"1361-Dobi=16,071","POLYGON ((6.990833319940643 9.055833498435476,...",POINT (6.996237046626966 9.061884223048828),0,Federal Capital Territory,Gwagwalada,10397,,,6,3,"[Fad Victory Basic Accademy, LEA Primary Schoo..."
91,287,6925,0,9010,"Goronmaje, Kano, Nigeria",Goronmaje,Photon,"287-Goronmaje=6,925","POLYGON ((8.690833313099597 12.52250015117062,...",POINT (8.690712345358138 12.52586036621089),0,Jigawa,Babura,9010,6.28171,,1,3,"[Goronmaje Junior Secondary School, Goronmaje ..."
132,1453,10554,1,7853,"Wako, Federal Capital Territory, Nigeria",Wako,Photon,"1453-Wako=10,554","POLYGON ((6.913333320252514 8.585833500324267,...",POINT (6.913862420779471 8.59841947910968),0,Federal Capital Territory,Kwali,7853,,,1,2,"[Junior Secondary School Wako, LEA Primary Sch..."
136,808,7739,1,7670,"Maruta, Jigawa, Nigeria",Maruta,Photon,"808-Maruta=7,739","POLYGON ((9.862499975051307 11.45333348880062,...",POINT (9.864477886689922 11.45836360926234),0,Bauchi,Shira,7670,5.486398,,1,2,"[Maruta Primary School, Government Day Junior ..."


In [97]:
gdf.loc[gdf['name'] == 'Baturiya Primary School',:]

Unnamed: 0,type,name,state,lat,lon,geometry
29556,school,Baturiya Primary School,Bauchi,11.2875,10.2073,POINT (10.20730 11.28750)
107283,school,Baturiya Primary School,Jigawa,12.4558,10.3136,POINT (10.31360 12.45580)


So this is due to similar school names on different states.

### Clusters with exceptionaly large no. of schools / healthcare

In [149]:
clust_new.sample()

Unnamed: 0,label,population,area,density,address,address0,geolocator,label_out,geometry,centroid,ongrid,adm1,adm2,score,pred_solar,org,No. healthcare units,No. school units
1111,1990,4055,2,1988,"Obeagu, Enugu, Nigeria",Obeagu,Photon,"1990-Obeagu=4,055","POLYGON ((7.667499983884305 5.945000177603672,...",POINT (7.670822900537607 5.951786635909724),0,Enugu,Aninri,1988,3.781085,"['Cottage Hospital Okpanku', 'Rev Fr Cletus Ok...",1,7


In [151]:
clust_new['sum'] = clust_new[['No. healthcare units','No. school units']].sum(axis=1)
clust_new['Orgs/population'] = clust_new['sum'] / clust_new ['population']
clust_new.sort_values(by = 'Orgs/population',ascending = False)

Unnamed: 0,label,population,area,density,address,address0,geolocator,label_out,geometry,centroid,ongrid,adm1,adm2,score,pred_solar,org,No. healthcare units,No. school units,sum,Orgs/population
1179,2080,4442,2,1870,"Ndoro, Abia, Nigeria",Ndoro,Photon,"2080-Ndoro=4,442","POLYGON ((7.571666650936619 5.428333513013337,...",POINT (7.566346785638176 5.439469876604948),0,Abia,Ikwuano,1870,1.670673,"['Faith Clinic Maternity', 'Ndoro Health Cente...",3,12,15,0.003377
1397,1858,8176,5,1412,"Effium, Ebonyi, Nigeria",Effium,Photon,"1858-Effium=8,176","POLYGON ((8.062499982294769 6.61916684156106, ...",POINT (8.058824271715256 6.631372913863443),0,Ebonyi,Ohaukwu,1412,2.622899,"['Sudan United Missions Hospital Effium', 'Vic...",5,22,27,0.003302
1482,1793,6654,6,1028,"Benue, Nigeria",Benue,Photon,"1793-Benue=6,654","POLYGON ((9.258333310815894 6.933333506965182,...",POINT (9.257578277261883 6.947313745536879),0,Benue,Ushongo,1028,5.002134,"['Redeem Clinic And Maternity', 'Faith Clinic ...",7,14,21,0.003156
1470,1738,5756,5,1117,"Benue, Nigeria",Benue,Photon,"1738-Benue=5,756","POLYGON ((9.204166644367202 7.23333350575957, ...",POINT (9.202831332799864 7.244532813330781),0,Benue,Buruku,1117,5.002134,"['Atii Clinic', 'Torkwembe Clinic', 'Tyowanye ...",8,10,18,0.003127
1879,1232,6507,5,1292,"Tula, Kaltungo, Gombe, Nigeria",Tula,Nominatim,"1232-Tula=6,507","POLYGON ((11.47083330191247 9.830833495320977,...",POINT (11.47397432754086 9.842247125504517),1,Gombe,Kaltungo,0,5.162851,"['Tantan Cottage Hospital', 'Wange Maternity',...",4,14,18,0.002766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1632,445,4724,1,2941,"Geza Ketere, Kano, Nigeria",Geza Ketere,Photon,"445-Geza Ketere=4,724","POLYGON ((8.509999980493964 12.17333348590715,...",POINT (8.513686849165991 12.18054308183778),1,Kano,Minjibir,0,5.826171,,0,0,0,0.000000
1858,1170,4247,0,8527,"Hardo Deba, Gombe, Nigeria",Hardo Deba,Photon,"1170-Hardo Deba=4,247","POLYGON ((11.29833330260664 10.13500016076529,...",POINT (11.30096488154342 10.14111127185184),1,Gombe,Yamaltu/Deba,0,5.162851,,0,0,0,0.000000
384,1223,4558,1,4280,"Mashegu, Niger, Nigeria",Mashegu,Nominatim,"1223-Mashegu=4,558","POLYGON ((4.832499995292757 9.915000161649402,...",POINT (4.838342798299555 9.920094858598647),0,Niger,Mashegu,4280,5.101130,,0,0,0,0.000000
100,214,4092,0,8824,"Tunbun Buhari, Borno, Nigeria",Tunbun Buhari,Photon,"214-Tunbun Buhari=4,092","POLYGON ((13.97249995851207 12.71250015040707,...",POINT (13.97254625480819 12.71594150841794),0,Borno,Monguno,8824,6.038863,,0,0,0,0.000000


In [153]:
clust_new.drop(['sum','Orgs/population'],axis=1,inplace=True)

In [154]:
clust_new.to_excel('D:/Google Drive/01 Data/12 Omdena/Repo/clusters_1km/clusters_2.0.xlsx',index=False)

# 500m clusters

## Load files

In [155]:
clust = pd.read_excel('D:/Google Drive/01 Data/12 Omdena/Repo/clusters_500m/clusters.xlsx')

In [158]:
clust.sample()

Unnamed: 0,label,population,area,density,address0,geometry,centroid,ongrid,adm1,adm2,score,pred_solar,name,healthcare,school,pvout,max_relative_pv_output,min_relative_pv_output,mean_relative_pv_output
566,805,6995,1,6845,Tungan Ali,"POLYGON ((5.801666658059355 9.976666828068248,...",POINT (5.803629274873893 9.981472123999076),0,Niger,Mashegu,6845,5.10113,,0,0,4.165,0.857721,0.847448,0.856959


# Apply per row

In [157]:
tqdm.pandas() # to be able to see the progress of the apply function
clust_new_500 = clust.progress_apply(get_num_schools_healthcare,gdf = gdf, axis=1)

100%|██████████████████████████████████████████████████████████████████████████████| 1245/1245 [29:51<00:00,  1.44s/it]


In [159]:
clust_new_500.sample()

Unnamed: 0,label,population,area,density,address0,geometry,centroid,ongrid,adm1,adm2,...,pred_solar,name,healthcare,school,pvout,max_relative_pv_output,min_relative_pv_output,mean_relative_pv_output,No. healthcare units,No. school units
938,1094,9354,1,5501,Ikeji,"POLYGON ((4.94583332817002 7.422500171666031, ...",POINT (4.949807286487352 7.429547046637728),1,Osun,Oriade,...,4.763178,"['Ikeji Arakeji Health Center', 'Nawair Ud Dee...",1,7,7.508,0.774982,0.762357,0.773275,1,7


## Check differences

### Check difference in healthcare

In [189]:
filter_diff_health = clust_new_500['healthcare'] != clust_new_500['No. healthcare units']
clust_new_500.loc[filter_diff_health,:].shape

(20, 19)

### Check difference in schools

In [190]:
filter_diff_school = clust_new_500['school'] != clust_new_500['No. school units']
clust_new_500.loc[filter_diff_school,:].shape

(29, 19)

### Check Kafin Madaki

In [174]:
clust_new_500.loc[clust_new_500['address0'].str.contains('Kafin Madaki'),:]
indx = clust_new_500.loc[clust_new_500['address0'].str.contains('Kafin Madaki'),:].index

In [180]:
clust_new_500.loc[indx,:].apply(get_org_names,gdf=gdf,axis=1)['org names'].values.tolist()

[['ECCDE Kofar Buri',
  'Kefin',
  'Madarasatul Miftahul Islam',
  'Kafin A',
  'Wali Islamaiyya',
  'Madarasatul Tahfizul Quran Wattartil Badau',
  'Formwan',
  'Badau Primary School',
  'Unguwa Kwari Primary School',
  'Haka Tafi Primary School',
  'ECCDE Unguwar Kwari',
  'Government Day Junior Technical School K Madaki',
  'Kofar Buri Primary School',
  'GDST',
  'Government Junior Technical Secondary School',
  'ECCDE Badau',
  'Tahfizul QuraaAn',
  'Government Junior Secondary School',
  'ECCDE Kafin Madaki',
  'Kalasu Primary School',
  'Yayaji Islamiyya',
  'Government Day Junior Secondary School',
  'Central Primary School K Madaki',
  'Fudiyya Islamiyya',
  'Sheikh Gumi',
  'Madarasatul Iqamatus Sunna',
  'Nurul Islam',
  'Madarasatul Ihyaus Sunna',
  'C O A and Islamic Studies',
  'Madarasatul Tahfiz Tarhrib',
  'Abu Furera Islammiya',
  'Kafin Madaki Maternity']]

checking online: https://myspotfinder.com/page/3/?s=kafin+madaki the region does seem to have many schools.

In [181]:
clust_new_500['healthcare'] = clust_new_500['No. healthcare units']
clust_new_500['school'] = clust_new_500['No. school units']

In [183]:
clust_new_500.drop(['No. healthcare units', 'No. school units'],axis=1,inplace=True)

In [186]:
clust_new_500.to_excel('D:/Google Drive/01 Data/12 Omdena/Repo/clusters_500m/clusters_2.0.xlsx',index = False)