This notebook uses `pisa<=1.0.0`

https://www.elsevier.com/authors/policies-and-guidelines/documents/elsdoc-1.pdf

In [1]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns, plotly.express as px, geopandas as gpd, folium, sys, os, itertools
from matplotlib.colors import to_rgba
from functools import reduce
from shapely.ops import unary_union
from tqdm.notebook import tqdm

%load_ext autoreload
%autoreload 2

sys.path.append('../../optimization/')
import maxcovering as mc

sys.path.append('./JG/')
import map_graphics as mg

# Definitions

In [2]:
path_data = './Data/'
overleaf_figures = r'D:\joaquimg\Dropbox\Apps\Overleaf\WHO Nepal Lancet\Figures/'
old_data_path_in_git_hub = r'D:\joaquimg\Documents\GitHub\clones\WHO_Nepal_Laboratory_Analytics\Data/'
old_data_path_out_git_hub = r'D:\joaquimg\Documents\GitHub\results\Nepal\Data\Travel Time/'

In [3]:
paper_colors = sns.color_palette('muted').as_hex()
paper_colors

# Some old data still needed

In [4]:
df_lab_owner = pd.read_excel(f'{old_data_path_in_git_hub}Nepal_Lab_Types.xlsx')
shapefile = gpd.read_file(f'{old_data_path_in_git_hub}shapefile_nepal_districts.geojson')

if os.path.exists(f'{old_data_path_out_git_hub}current_hospitals_new.pkl'):
    current_hospitals = pd.read_pickle(f'{old_data_path_out_git_hub}current_hospitals_new.pkl')
else:
    current_hospitals = pd.read_pickle(f'{old_data_path_out_git_hub}current_hospitals.pkl')
    current_hospitals.to_pickle(f'{old_data_path_out_git_hub}current_hospitals_new.pkl')


In [None]:
rwi_district = mg.get_rwi_district(old_data_path_in_git_hub,shapefile)

# New data from GitHub

In [None]:
population = pd.read_pickle(f'{path_data}population.pkl')
existing_labs = pd.read_pickle(f'{path_data}existing_labs.pkl')
potential_locs_hospitals = pd.read_pickle(f'{path_data}potential_locs_hospitals.pkl')     # first set, from OSM
potential_locs_healthsites = pd.read_pickle(f'{path_data}potential_locs_healthsites.pkl') # richer set
potential_locs_10kmgrid = pd.read_pickle(f'{path_data}potential_locs_10kmgrid.pkl')       # generated as a regular grid

# Ensure consistency on the laboratory names

In [None]:
assert set(existing_labs.L_NAME) == set(current_hospitals.L_NAME), set(existing_labs.L_NAME) ^ set(current_hospitals.L_NAME)
assert set(existing_labs.L_NAME) == set(df_lab_owner.Laboratory), set(existing_labs.L_NAME) ^ set(df_lab_owner.Laboratory)

# Enforce consistency in province naming

In [None]:
rename_provinces = { str(p) : f'Province {p}' for p in [1,2,5] } | { 'Sudur Pashchim' : 'Sudurpashchim', 'Lumbini' : 'Province 5' }

In [None]:
lab_descriptions = pd.merge( existing_labs, df_lab_owner, left_on='L_NAME', right_on='Laboratory' )
lab_descriptions['Owner'] = lab_descriptions['Type'].str[:1]
lab_descriptions['Latitude'] = existing_labs.geometry.apply(lambda point: point.y)
lab_descriptions['Longitude'] = existing_labs.geometry.apply(lambda point: point.x)
lab_descriptions['Laboratory'] = existing_labs.L_NAME

In [None]:
lab_descriptions['Province Name'] = [ rename_provinces.get(p,p) for p in lab_descriptions['Province'] ]
shapefile['Province Name'] = [ rename_provinces.get(p,p) for p in shapefile.Province ]
rwi_district['Province Name'] = [ rename_provinces.get(p,p) for p in rwi_district.Province ]

province_colors = { c : nc for c,nc in zip(set(lab_descriptions['Province Name'].values),paper_colors) }
province_colors['Nepal'] = paper_colors[len(province_colors)]
shapefile['COLOR'] = [ province_colors[c] for c in shapefile['Province Name'].values ]

In [None]:
shapefile.plot(color=shapefile['COLOR'])

# Descriptive

# Fixing 'missing province' from [the data preparation](https://github.com/Analytics-for-a-Better-World/Public-Infrastructure-Service-Access/blob/main/publications/WHO%20Nepal%20Paper/Nepal%20Paper%20Results.ipynb)

We follow the strategy described in https://gis.stackexchange.com/questions/413715/assign-point-to-closest-polygon-using-sjoin

In [None]:
mg.ShowPoints( locations = population[['ID','ycoord','xcoord','DISTRICT']], 
               choices = { 'red' : population[ population.Province.isna() ].index }, 
               icon_style ='font-size:12px', tiles='OpenStreetMap' )

In [None]:
aux = population[['ID','xcoord','ycoord','population','geometry']].copy()
aux.crs = 'EPSG:4326'
adm = shapefile.copy().to_crs(crs=3857)
adm['original'] = adm.geometry.copy()
adm['buffered'] = adm.geometry.buffer(1500)
adm = adm.set_geometry('buffered').to_crs(crs=4326)
adjusted_population = gpd.sjoin(aux, adm, how="left", predicate="within")
adjusted_population['dist'] = gpd.GeoSeries(adjusted_population['geometry_left']).to_crs(crs=3857).distance(gpd.GeoSeries(adjusted_population['original']).to_crs(crs=3857))
adjusted_population = adjusted_population.sort_values("dist", ascending=True).groupby(["ID"]).first().reset_index(drop=False)
assert adjusted_population[['ID','xcoord','ycoord','population']].equals(population[['ID','xcoord','ycoord','population']]), 'ID mismatch after adjusting population'
assert adjusted_population[~population.Province.isna()][['ID','Province']].equals(population[~population.Province.isna()][['ID','Province']]), 'Province mismatch after adjusting population'

In [None]:
mg.ShowPoints( locations = population[['ID','ycoord','xcoord','DISTRICT']], 
               choices = { province_colors[p] : idx for p,idx in adjusted_population[population.Province.isna()].groupby(['Province Name'])['ID'].unique().reset_index().set_index('Province Name').to_dict()['ID'].items() }, 
               icon_style ='font-size:12px', tiles='OpenStreetMap' )

# Create legends for pictures in the paper

In [None]:
def CreateLegend( colors, labels, alpha, filename, dpi ):

    class Handler(object):
        def __init__(self, color, alpha):
            self.color=color
            self.alpha=alpha
        def legend_artist(self, legend, orig_handle, fontsize, handlebox):
            x0, y0 = handlebox.xdescent, handlebox.ydescent
            width, height = handlebox.width, handlebox.height
            patch = plt.Rectangle([x0, y0], width, height, facecolor=self.color, alpha=self.alpha,
                                    edgecolor='k', transform=handlebox.get_transform())
            handlebox.add_artist(patch)
            return patch
        
    def export_legend(legend, filename=filename, dpi=dpi):
        fig  = legend.figure
        fig.canvas.draw()
        bbox  = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
        fig.savefig(filename, dpi=dpi, bbox_inches=bbox)

    handles = [plt.Rectangle((0,0),1,1) for i  in range(len(colors))]
    hmap = dict(zip(handles, [Handler(color,alpha) for color in colors] ))

    export_legend( plt.legend(handles=handles, labels=labels, handler_map=hmap) )

In [None]:
labels = sorted(province_colors.keys())

labels.remove('Nepal')
colors = [ province_colors[p] for p in labels ]

CreateLegend( colors, labels, .8, overleaf_figures+'legend_provinces.pdf', dpi='figure' )

labels = labels + ['Nepal']
colors = [ province_colors[p] for p in labels ]

CreateLegend( colors, labels, 1, overleaf_figures+'legend_nepal.pdf', dpi='figure' )

labels.remove('Nepal')
colors = [ province_colors[p] for p in labels ] + ['lightpink']
labels = labels + ['New choice']

CreateLegend( colors, labels, .8, overleaf_figures+'legend_opt_only_new.pdf', dpi='figure' )

colors = colors + ['lightcyan']
labels = labels + ['Already chosen']

CreateLegend( colors, labels, .8, overleaf_figures+'legend_opt.pdf', dpi='figure' )

# Descriptive analysis situation as it is in Nepal

In [None]:
lab_provinces = { lab : province for lab,province in lab_descriptions[['L_NAME','Province Name']].values }
province_geometries = { province : unary_union( shapefile[shapefile['Province Name']==province].geometry ) for province in set(shapefile['Province Name']) }

In [None]:
def style_provinces(x):
    return dict(fillColor=x['properties']['COLOR'],color='black',weight=.5,fillOpacity=.25)

def style_isochrones(x):
    return dict(fillColor='green',fillOpacity=.5,color='red',weight=1)

folium_map = mg.ShowPoints( locations = lab_descriptions[['Owner','Latitude','Longitude','Laboratory']], 
               choices = { province_colors[p] : idx for p,idx in lab_descriptions.groupby(['Province Name'])['Hosp_ID'].unique().reset_index().set_index('Province Name').to_dict()['Hosp_ID'].items() }, 
               icon_style ='font-size:12px', tiles='OpenStreetMap' )
adm = folium.GeoJson(data=shapefile, style_function=style_provinces)
adm.add_to(folium_map)

for name, data in current_hospitals[['L_NAME','60min_driving']].values:
    geo_j = folium.GeoJson(data=province_geometries[lab_provinces[name]].intersection(data),style_function=style_isochrones)
    geo_j.add_to(folium_map)

mg.FoliumToPng( folium_map=folium_map, crop=(300, 113, 1068, 627), file_name=overleaf_figures+'labs' )
folium_map

In [None]:
existing_labs_summary = lab_descriptions[['Laboratory','Type','Province Name']].drop_duplicates().reset_index(drop=True).groupby(['Province Name','Type'],as_index=True).size().unstack(level=1).fillna(0).astype(int).sort_index()
existing_labs_summary.loc['Nepal'] = existing_labs_summary.sum(numeric_only=True, axis=0)
existing_labs_summary.loc[:,'Laboratories'] = existing_labs_summary.sum(numeric_only=True, axis=1)
existing_labs_summary = existing_labs_summary[['Laboratories','Government','Private']]

In [None]:
existing_labs_summary

In [None]:
head_count_per_province = adjusted_population[[c for c in population.columns if not c == 'geometry']].groupby('Province').sum()['population'].to_frame().astype(int).rename(index=rename_provinces).rename(index=rename_provinces).sort_index()

adm_stats = existing_labs_summary.merge(head_count_per_province,left_index=True,right_index=True).rename(columns={'population' : 'Inhabitants' })
adm_stats.loc['Nepal'] = adm_stats.sum(numeric_only=True, axis=0).round(2)
adm_stats['Labs P.M.'] = (adm_stats.Laboratories / adm_stats.Inhabitants * 1e6).round(2)

import pyperclip
pyperclip.copy(adm_stats.style.format(decimal='.', thousands=',', precision=2).to_latex())

adm_stats

# Growth over the months

In [None]:
def AdjustColumnsForLegendAndColorsForLinePlots( df, colors, n=2, d=0, what='' ):
    _min = df.min().to_dict()
    _max = df.max().to_dict()
    original = df.columns.to_list()
    max_c = max([len(o) for o in original])
    df.columns = [f'{_min[c]:{n}.{d}f} to {_max[c]:{n}.{d}f}{what}<br>{c:<{max_c}}' for c in df.columns]
    return df, { c : colors[o] for c,o in zip(df.columns,original) }       

# Lab counts

In [None]:
lab_counts = lab_descriptions[['Date Open','L_NAME','Province Name']].pivot_table(index='Date Open',columns='Province Name',aggfunc=len,fill_value=0,margins=True, margins_name='Nepal').reset_index().set_index('Date Open')
lab_counts.drop(lab_counts.tail(1).index,inplace=True)
lab_counts.columns = list(zip(*lab_counts.columns.values))[1]

In [None]:
aux, colors = AdjustColumnsForLegendAndColorsForLinePlots( lab_counts.cumsum(), province_colors, what=' labs' )
mg.draw_lines( aux,
              x=aux.index,y=aux.columns,
              x_title=None, 
              y_title='Number of Laboratories',
              width=600,height=350,
              colors=colors,
              line_width=5, horizontal_legend=False,
              file_name=overleaf_figures+'nof_labs_2021.pdf')

# Accessibility 

In [None]:
is_chrone = lambda c : 'min' in c and c.startswith('ID')

In [None]:
assert list(filter(is_chrone, existing_labs.columns)) == list(filter(is_chrone, potential_locs_healthsites.columns))
assert list(filter(is_chrone, existing_labs.columns)) == list(filter(is_chrone, potential_locs_10kmgrid.columns))

In [None]:
chrone_columns = list(filter(is_chrone, existing_labs.columns))

In [None]:
all_facs = pd.concat([existing_labs[chrone_columns],potential_locs_healthsites[chrone_columns]],ignore_index=True)
household = adjusted_population.population.astype(np.uint).values
already_open = all_facs.index[:len(existing_labs)].values

In [None]:
def purify( lol ):
    return [l for l in lol if l]

In [None]:
coverage_obtained_in_nepal = lab_descriptions.groupby(['Date Open'])['Hosp_ID'].unique().reset_index().set_index('Date Open')
coverage_obtained_in_nepal['sol'] = coverage_obtained_in_nepal['Hosp_ID'].apply(list).cumsum()
for c in chrone_columns:
    coverage_obtained_in_nepal[' '.join(c.split('_')[1:])] = coverage_obtained_in_nepal['sol'].apply( lambda s : household[mc.all_in(purify(lab_descriptions[lab_descriptions['Hosp_ID'].isin(s)][c].values))].sum()/household.sum()*100 )
done = coverage_obtained_in_nepal[coverage_obtained_in_nepal.columns[-4:]]
done, colors = AdjustColumnsForLegendAndColorsForLinePlots(done, dict(zip(done.columns,sns.color_palette('muted').as_hex())), n=4, d=1, what='%' )
mg.draw_lines(done, 
              y=done.columns, 
              x_title=None, 
              y_title='% population with access', 
              width=600,height=200,line_width=5,
              horizontal_legend=False,
              colors=colors,
              file_name=overleaf_figures+'unrestricted_coverage_2021.pdf')

In [None]:
set( adjusted_population['Province Name'] ) ^ set( lab_descriptions['Province Name'] )

In [None]:
households_per_province = { p : set(idx) for p,idx in adjusted_population.groupby(['Province Name'])['ID'].unique().reset_index().set_index('Province Name').to_dict()['ID'].items() }

In [None]:
for combination in itertools.combinations(households_per_province.values(), 2):
    assert len( reduce(lambda x, y: x.intersection(y),combination) ) == 0
assert reduce(lambda x, y: x.union(y),households_per_province.values()) == set(adjusted_population.ID.values)

In [None]:
for c in [c for c in lab_descriptions.columns if c.lower().startswith('id_') ]:
    lab_descriptions['R'+c] = lab_descriptions.apply(lambda row: list(set(row[c]).intersection(households_per_province.get(row['Province Name'], set()))), axis=1)
    lab_descriptions['R minus '+c] = lab_descriptions.apply(lambda row: set(row['R'+c]).symmetric_difference(set(row[c])), axis=1)

In [None]:
boundary_lab = lab_descriptions.loc[lab_descriptions['R minus ID_60min_driving'].apply(len).argmax()].to_dict()
name = boundary_lab['L_NAME']
own_province = boundary_lab['Province Name']
left_out = household[list(boundary_lab['R minus ID_60min_driving'])].sum()
province_to_the_left = lab_descriptions[lab_descriptions.L_NAME.str.contains('Universal College')]['Province Name'].values[0] # name from tooltip
pyperclip.copy( f'{name} at {own_province} could serve {left_out} from {province_to_the_left}' )
folium_map = mg.ShowPoints( locations = lab_descriptions[['Owner','Latitude','Longitude','Laboratory']], 
               choices = { province_colors[p] : idx for p,idx in lab_descriptions.groupby(['Province Name'])['Hosp_ID'].unique().reset_index().set_index('Province Name').to_dict()['Hosp_ID'].items() }, 
               icon_style ='font-size:12px', tiles='OpenStreetMap' )
folium.GeoJson(data=shapefile, style_function=style_provinces).add_to(folium_map)
folium_map = mg.FitAround( folium_map, boundary_lab['geometry'].y, boundary_lab['geometry'].x, delta_lat=.3, delta_lon=.3 )
data = current_hospitals[current_hospitals['L_NAME']==name]['60min_driving'].values[0]
folium.GeoJson(data=data,style_function=lambda _ : dict(fillColor='red',fillOpacity=.3,color='red',weight=1)).add_to(folium_map)
folium.GeoJson(data=province_geometries[lab_provinces[name]].intersection(data),style_function=style_isochrones).add_to(folium_map)
mg.FoliumToPng( folium_map=folium_map, crop=(350, 200, 1000, 600), file_name=overleaf_figures+'restricted_or_not' )
folium_map

In [None]:
coverage_obtained_in_nepal = lab_descriptions.groupby(['Date Open'])['Hosp_ID'].unique().reset_index().set_index('Date Open')
coverage_obtained_in_nepal['sol'] = coverage_obtained_in_nepal['Hosp_ID'].apply(list).cumsum()
for c in chrone_columns:
    coverage_obtained_in_nepal[' '.join(c.split('_')[1:])] = coverage_obtained_in_nepal['sol'].apply( lambda s : household[mc.all_in(purify(lab_descriptions[lab_descriptions['Hosp_ID'].isin(s)]['R'+c].values))].sum()/household.sum()*100 )
done = coverage_obtained_in_nepal[coverage_obtained_in_nepal.columns[-4:]]
done, colors = AdjustColumnsForLegendAndColorsForLinePlots(done, dict(zip(done.columns,sns.color_palette('muted').as_hex())), n=4, d=1, what='%' )
mg.draw_lines(done, 
              y=done.columns, 
              x_title=None, 
              y_title='% population with access', 
              width=600,height=200,line_width=5,
              horizontal_legend=False,
              colors=colors,
              file_name=overleaf_figures+'restricted_coverage_2021.pdf')

## At district level

In [None]:
pop_per_district = adjusted_population.pivot_table( columns='Province Name', index='DISTRICT', values='population', aggfunc=sum, fill_value=0, margins=True ).astype(int)

In [None]:
def GetCoveragePerDistrict( data, column, pop_with_district=adjusted_population ):
    cov = pop_with_district[ pop_with_district.ID.isin( mg.UnionListOfLists( data[column].values ) ) ]
    return ( cov.pivot_table( columns='Province Name', index='DISTRICT', values='population', aggfunc=sum, fill_value=0, margins=True ) / pop_per_district ).fillna(0).All.to_dict()

In [None]:
def ColorCoveragePerDistrict( data, column, file_name=None, pop_with_district=adjusted_population, dpi=150 ):
    cov_per_district = GetCoveragePerDistrict( data, column, pop_with_district )
    shapefile[column] = [ to_rgba( c, alpha=cov_per_district[d] ) if cov_per_district[d] > 0 else 'red' for c,d in shapefile[['COLOR','DISTRICT']].values ]
    return mg.DrawAdministrative(shapefile,file_name=file_name,color=column, dpi=dpi )

In [None]:
for c in filter( lambda c : c.startswith('ID_'), lab_descriptions.columns ):
    ColorCoveragePerDistrict( lab_descriptions, c, file_name=overleaf_figures+f'District_{c}.png' )

In [None]:
for c in filter( lambda c : c.startswith('RID_'), lab_descriptions.columns ):
    ColorCoveragePerDistrict( lab_descriptions, c, file_name=overleaf_figures+f'District_{c}.png' )

In [None]:
nof_districts = adjusted_population.DISTRICT.nunique()

In [None]:
def GetCoveragePerProvince( data, column, pop_with_district=adjusted_population ):
    cov = pop_with_district[ pop_with_district.ID.isin( mg.UnionListOfLists(data[column].values) ) ]
    return cov.pivot_table( columns='DISTRICT', index='Province Name', values='population', aggfunc=sum, fill_value=0, margins=True )

In [None]:
head_count_nepal_and_per_province = head_count_per_province.population
head_count_nepal_and_per_province.loc['All'] = head_count_per_province.population.sum()
head_count_nepal_and_per_province

In [None]:
pop_per_province = adjusted_population.groupby('Province Name')['population'].sum().astype(int)

In [None]:
pop_per_district = adjusted_population.groupby(['DISTRICT'])['population'].sum().astype(int)
pop_per_district['All'] = pop_per_district.sum()
pop_per_district = pop_per_district.to_dict()

In [None]:
district_counts = adjusted_population.groupby('Province Name')['DISTRICT'].nunique()
district_counts['All'] = district_counts.sum()
district_counts = district_counts.to_dict()
district_counts

In [None]:
districts_per_province = adjusted_population.groupby('Province Name')['DISTRICT'].unique().apply(set).to_dict()
districts_per_province['All'] = set(adjusted_population['DISTRICT'].unique())

In [None]:
def ForTheseColumns( columns ):
    cov_per_province = pd.DataFrame()
    isolated = dict()
    for c in columns:
        aux = adjusted_population.copy()
        aux.loc[ ~aux.ID.isin( mg.UnionListOfLists(lab_descriptions[c].values) ), 'population' ] = 0
        aux = aux.pivot_table( columns='DISTRICT', index='Province Name', values='population', aggfunc=sum, fill_value=0, margins=True ).astype(int)
        # cov_per_province['nof_'+c] = aux.All
        # cov_per_province['nof_districts_'+c] = aux.replace(0,np.nan).count(axis=1)-1
        # cov_per_province['min_access_'+c] = 
        cov_per_province['pc_'+c] = (aux.div(head_count_nepal_and_per_province,axis=0)*100).round(2).All
        access = aux.apply(lambda row: {col: val/pop_per_district[col] for col, val in row.items() if val != 0}, axis=1).to_dict()
        isolated[c] = aux.apply(lambda row: [col for col, val in row.items() if val == 0 and col in districts_per_province[row.name]], axis=1).to_dict()
        cov_per_province = pd.concat( [cov_per_province, ( pd.DataFrame.from_dict( { k : (district_counts[k]-(len(v)-1),round(min(v.values())*100,2), round(max(v.values())*100,1)) for k,v in access.items() }, orient='index', columns=['nof_'+c,'min_'+c,'max_'+c] ) )], axis=1 )
        # display(aux.replace(0,np.nan).count(axis=1)-1)
        # display((aux.div(head_count_nepal_and_per_province,axis=0)*100).round(1))
    pyperclip.copy( cov_per_province.style.format(decimal='.', thousands=',', precision=2).to_latex() )
    return cov_per_province, isolated

In [None]:
cov_per_province, isolated_u = ForTheseColumns( chrone_columns )

In [None]:
cov_per_province, isolated_r = ForTheseColumns( [c for c in lab_descriptions.columns if c.startswith('RID_')] )

In [None]:
cov_per_province

In [None]:
len(isolated_u['ID_60min_driving']['All']),len(isolated_r['RID_60min_driving']['All']),[d.title() for d in sorted(list(set(isolated_r['RID_60min_driving']['All'])-set(isolated_u['ID_60min_driving']['All'])))]

In [None]:
GetCoveragePerProvince( lab_descriptions, 'ID_60min_driving' ).div(head_count_nepal_and_per_province,axis=0).replace(0.0,np.nan).count(axis=1)-1

In [None]:
mg.GetAccessibilityFromData( mg.UnionListOfLists(lab_descriptions['ID_60min_driving'].values), adjusted_population, rwi_district ).set_index( 'Province Name', drop=True )

In [None]:
pd.DataFrame({ c : GetCoveragePerProvince( lab_descriptions, c, adjusted_population ).All.astype(int) for c in chrone_columns })

In [None]:
labs = dict()
labs['may'] = lab_descriptions[ lab_descriptions['Date Open'] <= '2021-05-01' ]
labs['november'] = lab_descriptions

In [None]:
for period in labs.keys():
    for c in ['ID_60min_driving','RID_60min_driving']:
        accessibility_frame = mg.GetAccessibilityFromData( mg.UnionListOfLists(labs[period][c]), adjusted_population, rwi_district )
        for_table = accessibility_frame.copy()
        for_table['District'] = accessibility_frame.District.str.replace('_',' ').str.title()
        for_table.columns = for_table.columns.str.replace('_',' ').str.replace('%','\%')
        width = 600 if c.startswith('RID') else 750
        height = 250
        showlegend = c.startswith('ID')
        # pyperclip.copy(for_table.sort_values( by=['Province Name','District'] ).style.hide(axis='index').format(decimal='.', thousands=',', precision=2).to_latex())
        display(mg.ShowRWIxAccess( accessibility_frame, province_colors, width=width, height=height, file_name=overleaf_figures+f'{period}_RWI_Nepal_{c}.pdf', showlegend = showlegend, title=None, trendline='ols', trendline_scope='overall', trendline_color_override='red' ))
        display(mg.ShowRWIxAccess( accessibility_frame[ ~accessibility_frame.District.isin( ['KATHMANDU','BHAKTAPUR' ] ) ], province_colors, width=600, height=height, showlegend = False, xrange = [-.55, .35], file_name=overleaf_figures+f'{period}_RWI_without_Kathmandu_{c}.pdf', title=None, trendline='ols' ))


# Optimization

In [None]:
assert len( set(all_facs.index) ) == len( all_facs.index ), 'indices not unique'

In [None]:
for df in [all_facs, existing_labs, potential_locs_10kmgrid, potential_locs_healthsites, potential_locs_hospitals ]:
    for c in all_facs.columns:
        assert( set( mc.all_in( [ l for l in df[c].values if l ] ) ).issubset( set( range(len(household))) ) ), f'unknown households in {c} reach of facilities'

In [None]:
def ShowServeNobody( locations, metric='ID_60min_driving' ):
    idx = locations[metric].apply(len) == 0
    isolated = pd.concat([locations.ID,locations.geometry.y,locations.geometry.x,locations.ID],axis=1)[idx]
    return mg.ShowPoints( locations = isolated, choices = { 'red' : isolated.index.values }, icon_style ='font-size:7px', tiles='OpenStreetMap' )

In [None]:
ShowServeNobody( potential_locs_10kmgrid, metric='ID_60min_walking' )

In [None]:
if os.path.exists('opt_results.pkl'):
    results = pd.read_pickle('opt_results.pkl')
else:
    M = len(already_open)
    M_new = len(all_facs)-len(already_open)+1
    results = pd.DataFrame()
    for c in all_facs.columns:
        I, J, IJ, JI = mc.CreateIndexMapping( all_facs[c], household )
        results_fixing = mc.OptimizeWithGurobipy( household,I,J,IJ, np.arange(M+1,M+M_new,1), parsimonious=True, already_open=already_open, trace=False, progress=tqdm )
        results[' '.join(c.split('_')[1:])] = ( results_fixing.value / sum(household) * 100 ).astype(float).round(1)
    results.to_pickle('opt_results.pkl')

In [None]:
ax = results.plot()
ax.set_title("potential_locs_healthsites",color='black')
ax.legend(bbox_to_anchor=(1.0, 1.0))
ax.plot()
plt.show()

In [None]:
end_of_growth = max( np.argmax( results, axis=0 ) ) + 1

In [None]:
end_of_growth

In [None]:
aux = pd.concat( [results.max().to_frame(), results[:end_of_growth].max().to_frame()], axis=1 )
aux.columns=['complete','beginning']
assert all( aux['complete'] == aux['beginning'] )
aux

In [None]:
aux, colors = AdjustColumnsForLegendAndColorsForLinePlots(results[:end_of_growth], dict(zip(results.columns,sns.color_palette('muted').as_hex())), n=4, d=1 )
mg.draw_lines(aux, 
              y=aux.columns, 
              x_title=None, 
              y_title=None, 
              width=600,height=200,line_width=5,
              horizontal_legend=False,
              colors=colors,
              file_name=overleaf_figures+'max_attainable_coverage.pdf')

In [None]:
end_of_growth/len(potential_locs_healthsites)*100

In [None]:
coverage_obtained_in_nepal

In [None]:
beginning = coverage_obtained_in_nepal.iloc[0]['sol']
M = len(beginning)
M_new = 20
results_20 = pd.DataFrame()
results_fixing = dict()
for c in all_facs.columns:
    I, J, IJ, JI = mc.CreateIndexMapping( all_facs[c], household )
    results_fixing[c] = mc.OptimizeWithGurobipy( household,I,J,IJ, np.arange(M,M+M_new+1,1), parsimonious=True, already_open=beginning, trace=False, progress=tqdm )
    #mc.PyomoOptimize( household,I,J,IJ, np.arange(M,M+M_new+1,1), parsimonious=True, already_open=beginning, trace=False, progress=tqdm, solver='gurobi_direct' )
    results_20[' '.join(c.split('_')[1:])] = ( results_fixing[c].value / sum(household) * 100 ).astype(float).round(1)


In [None]:
len( beginning )

In [None]:
set( coverage_obtained_in_nepal.iloc[0].sol ) - set(beginning), set(beginning) - set( coverage_obtained_in_nepal.iloc[0].sol ) 

In [None]:
{ c : (set(beginning) - set(results_fixing[c].iloc[0].solution), set(results_fixing[c].iloc[0].solution) - set(beginning)) for c in chrone_columns }

In [None]:
aux, colors = AdjustColumnsForLegendAndColorsForLinePlots(results_20.copy(), dict(zip(results_20.columns,sns.color_palette('muted').as_hex())), n=4, d=1 )
mg.draw_lines(aux, 
              y=aux.columns, 
              x_title=None, 
              y_title=None, 
              width=600,height=200,line_width=5,
              horizontal_legend=False,
              colors=colors,
              file_name=overleaf_figures+'optimal_20.pdf')

In [None]:
aux = pd.concat([existing_labs['geometry'],potential_locs_healthsites['geometry']],ignore_index=True)
all_facs_coordinates = pd.DataFrame()
all_facs_coordinates['Longitude'] = aux.apply(lambda point: point.x)
all_facs_coordinates['Latitude'] = aux.apply(lambda point: point.y)
all_facs_coordinates = all_facs_coordinates.reset_index()

In [None]:
for c in chrone_columns:
    chosen_again, new_choices = set( results_fixing[c].solution.values[-1] ) & set(existing_labs.Hosp_ID.values) - set(beginning), set( results_fixing[c].solution.values[-1] ) - set(existing_labs.Hosp_ID.values)
    color_new_choice, color_chosen_again = 'lightpink', 'lightcyan'

    choices = { province_colors[p] : idx for p,idx in lab_descriptions[lab_descriptions.index.isin(beginning)].groupby(['Province Name'])['Hosp_ID'].unique().reset_index().set_index('Province Name').to_dict()['Hosp_ID'].items() } | \
                { color_new_choice : list(new_choices) } | \
                { color_chosen_again : list(chosen_again) }

    folium_map = mg.ShowPoints( locations = all_facs_coordinates[['index','Latitude','Longitude','index']], 
                choices = choices, 
                icon_style ='font-size:12px', tiles='OpenStreetMap' )
    adm = folium.GeoJson(data=shapefile, style_function=style_provinces)
    adm.add_to(folium_map)

    # for name, data in current_hospitals[['L_NAME','60min_driving']].values:
    #     geo_j = folium.GeoJson(data=province_geometries[lab_provinces[name]].intersection(data),style_function=style_isochrones)
    #     geo_j.add_to(folium_map)

    mg.FoliumToPng( folium_map=folium_map, crop=(300, 113, 1068, 627), file_name=overleaf_figures+f'labs_{c}' )
    display(folium_map)

In [None]:
RWI_population = pd.merge(adjusted_population, rwi_district, left_on='DISTRICT', right_on='District', how='left')[['population','Median RWI']]
RWI_population['weight'] = RWI_population['population'].multiply(1000*(2-RWI_population['Median RWI']))
RWI_population

In [None]:
weight = RWI_population['weight'].values

In [None]:
beginning = coverage_obtained_in_nepal.iloc[0]['sol']
M = len(beginning)
M_new = 20
results_fixing = dict()
for c in all_facs.columns:
    I, J, IJ, JI = mc.CreateIndexMapping( all_facs[c], household )
    results_fixing[c] = mc.OptimizeWithGurobipy( weight,I,J,IJ, np.arange(M,M+M_new+1,1), parsimonious=True, already_open=beginning, trace=False, progress=tqdm )

In [None]:
results_20 = pd.DataFrame()
for c in all_facs.columns:
    results_20[' '.join(c.split('_')[1:])] = [ (household[ mc.all_in( all_facs[all_facs.index.isin(s)][c].values ).astype(int) ].sum()/household.sum())*100 for s in results_fixing[c].solution.values]

In [None]:
aux, colors = AdjustColumnsForLegendAndColorsForLinePlots(results_20.copy(), dict(zip(results_20.columns,sns.color_palette('muted').as_hex())), n=4, d=1 )
mg.draw_lines(aux, 
              y=aux.columns, 
              x_title=None, 
              y_title=None, 
              width=600,height=200,line_width=5,
              horizontal_legend=False,
              colors=colors,
              file_name=overleaf_figures+'optimal_20_RWI.pdf')

In [None]:
for c in chrone_columns:
    sol = results_fixing[c].solution.values[-1]
    idx = all_facs.index.isin(sol)
    accessibility_frame = mg.GetAccessibilityFromData( mg.UnionListOfLists(purify(all_facs[idx][c])), adjusted_population, rwi_district )
    for_table = accessibility_frame.copy()
    for_table['District'] = accessibility_frame.District.str.replace('_',' ').str.title()
    for_table.columns = for_table.columns.str.replace('_',' ').str.replace('%','\%')
    width = 600 if c.startswith('RID') else 750
    height = 250
    showlegend = c.startswith('ID')
    # pyperclip.copy(for_table.sort_values( by=['Province Name','District'] ).style.hide(axis='index').format(decimal='.', thousands=',', precision=2).to_latex())
    display(mg.ShowRWIxAccess( accessibility_frame, province_colors, width=width, height=height, file_name=overleaf_figures+f'RWI_Nepal_{c}.pdf', showlegend = showlegend, title=None, trendline='ols', trendline_scope='overall', trendline_color_override='red' ))
    display(mg.ShowRWIxAccess( accessibility_frame[ ~accessibility_frame.District.isin( ['KATHMANDU','BHAKTAPUR' ] ) ], province_colors, width=600, height=height, showlegend = False, xrange = [-.55, .35], file_name=overleaf_figures+f'RWI_without_Kathmandu_{c}.pdf', title=None, trendline='ols' ))
