# Optimizing a Healthcare Network for Improved Service Delivery


## 1- Data Gathering: 
Read table from Washington Demographics and make its dataframe.

In [1]:
# read table from Washington-Demographic data
import pandas as pd

df = pd.read_html('https://www.washington-demographics.com/zip_codes_by_population',header=0)

population_df = df[0]
population_df.head()

Unnamed: 0,Washington Zip Codes by Population Rank,Zip Code,Population
0,1,99301,77226.0
1,2,98052,65251.0
2,3,98012,62039.0
3,4,98208,58030.0
4,5,98682,56411.0


## 2- Data Understanding: 
Understand the gathered data with different pandas method

In [2]:
# check the shape of dataframe
population_df.shape

(564, 3)

In [3]:
# check the dimension of dataframe
population_df.ndim

2

In [4]:
# check the datatypes of dataframe's columns
population_df.dtypes

Washington Zip Codes by Population Rank     object
Zip Code                                    object
Population                                 float64
dtype: object

In [5]:
# check dataframe columns
population_df.columns

Index(['Washington Zip Codes by Population Rank', 'Zip Code', 'Population'], dtype='object')

In [6]:
# check dataframes statistical summery
population_df.describe()

Unnamed: 0,Population
count,563.0
mean,12538.698046
std,14665.995183
min,1.0
25%,955.5
50%,5365.0
75%,22795.0
max,77226.0


In [7]:
# check 1st 5 rows of dataframe
population_df.head()

Unnamed: 0,Washington Zip Codes by Population Rank,Zip Code,Population
0,1,99301,77226.0
1,2,98052,65251.0
2,3,98012,62039.0
3,4,98208,58030.0
4,5,98682,56411.0


## 3- Data Cleaning:
Clean data for manipulation purpose.

In [8]:
population_df.tail()

Unnamed: 0,Washington Zip Codes by Population Rank,Zip Code,Population
559,560,98853.0,16.0
560,561,98238.0,11.0
561,562,98852.0,7.0
562,563,99034.0,1.0
563,United States Census Bureau. B01001 SEX BY AGE...,,


In [9]:
# Now we delete last row which is just the description of the table
population_df = population_df.drop([563], axis=0)

In [10]:
population_df.tail()

Unnamed: 0,Washington Zip Codes by Population Rank,Zip Code,Population
558,559,98641,20.0
559,560,98853,16.0
560,561,98238,11.0
561,562,98852,7.0
562,563,99034,1.0


In [11]:
# Convert Population column into type int
population_df = population_df.astype({"Population": int})

In [12]:
population_df['Population'].dtypes

dtype('int64')

In [13]:
# Now we delete the column "Washington Zip codes by Population rank', becuse it is unncessary here
population_df.drop(["Washington Zip Codes by Population Rank"], axis = 1, inplace = True)

In [14]:
population_df.head()

Unnamed: 0,Zip Code,Population
0,99301,77226
1,98052,65251
2,98012,62039
3,98208,58030
4,98682,56411


## 4- Data Manipulation:
Now manipulate the data for desired solution

In [15]:
# Its time to extract only the information of those area that is given in the document, so we make a new df
df1 = population_df[population_df['Zip Code'].isin(['98007','98290','98065','98801','98104']) ]
df1

Unnamed: 0,Zip Code,Population
23,98801,43242
60,98290,34043
96,98007,27954
189,98065,15081
192,98104,14143


In [16]:
# reset the index of new df
df1 = df1.reset_index(drop=True)
df1

Unnamed: 0,Zip Code,Population
0,98801,43242
1,98290,34043
2,98007,27954
3,98065,15081
4,98104,14143


In [17]:
# make dataframe that is given in the document
facility_id = ['A','B','C','D','E']
facility_area_zip_code = ['98007','98290','98065','98801','98104'] 
facility_staff_count = [21, 52, 43, 9, 64]
df2 = pd.DataFrame({'Facility ID': facility_id,
                         'Zip Code': facility_area_zip_code,
                          'Facility Staff Count': facility_staff_count
                        })
df2

Unnamed: 0,Facility ID,Facility Staff Count,Zip Code
0,A,21,98007
1,B,52,98290
2,C,43,98065
3,D,9,98801
4,E,64,98104


In [18]:
# merge both dataframes df1 & df2 in which same column is Zip code
facilities = pd.merge(df2, df1, on = 'Zip Code')
facilities

Unnamed: 0,Facility ID,Facility Staff Count,Zip Code,Population
0,A,21,98007,27954
1,B,52,98290,34043
2,C,43,98065,15081
3,D,9,98801,43242
4,E,64,98104,14143


In [19]:
# Set the order of above dataframe
facilities = facilities[['Facility ID', 'Zip Code', 'Population', 'Facility Staff Count']]
facilities

Unnamed: 0,Facility ID,Zip Code,Population,Facility Staff Count
0,A,98007,27954,21
1,B,98290,34043,52
2,C,98065,15081,43
3,D,98801,43242,9
4,E,98104,14143,64


### Now our data is completely ready for finding solution of the given problem
1st we find current healthcare worker to patient ratio

In [20]:
facilities['Healthcare worker to patient ratio'] = facilities['Population']/ facilities['Facility Staff Count']
facilities

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,Facility ID,Zip Code,Population,Facility Staff Count,Healthcare worker to patient ratio
0,A,98007,27954,21,1331.142857
1,B,98290,34043,52,654.673077
2,C,98065,15081,43,350.72093
3,D,98801,43242,9,4804.666667
4,E,98104,14143,64,220.984375


In [21]:
# change "Healthcare worker to patient ratio" col into integer data type
facilities = facilities.astype({"Healthcare worker to patient ratio": int})
facilities

Unnamed: 0,Facility ID,Zip Code,Population,Facility Staff Count,Healthcare worker to patient ratio
0,A,98007,27954,21,1331
1,B,98290,34043,52,654
2,C,98065,15081,43,350
3,D,98801,43242,9,4804
4,E,98104,14143,64,220


Here we can see that all facilities in the above table having good Healthcare worker to patient ratio except facility D, so now we write the code to shift enough number of healthcare worker into those places where they are less in quantity.

We shift number of staff into different facilities according to the percentage of population

In [22]:
# find percentage of the population of each area 
population_percentage_in_area = []
total_population = facilities['Population'].sum()
total_staff = facilities['Facility Staff Count'].sum()
for i in facilities['Population']:
    p = (i / total_population) * 100
    population_percentage_in_area.append(p)
    
# convert number of staff according to the population percentage in each area

staff1 = []
for i in facilities.index:
    s = (population_percentage_in_area[i] * total_staff) / 100
    staff1.append(round(s))


In [23]:
# Display Population Percantage in each area & converted staff according to it
print(population_percentage_in_area)
print(staff1)

[20.789362129359006, 25.31774540208087, 11.21572477187033, 32.15903259632761, 10.518135100362182]
[39.0, 48.0, 21.0, 61.0, 20.0]


Now make both of above entites as a part of dataframe

In [24]:
new_facility_with_updated_staff = facilities.drop(['Facility Staff Count', 'Healthcare worker to patient ratio'], axis = 1)
new_facility_with_updated_staff['New Staff Count'] = staff1
new_facility_with_updated_staff['New Healthcare worker to patient ratio'] = round(new_facility_with_updated_staff['Population'] / new_facility_with_updated_staff['New Staff Count'])
new_facility_with_updated_staff

Unnamed: 0,Facility ID,Zip Code,Population,New Staff Count,New Healthcare worker to patient ratio
0,A,98007,27954,39.0,717.0
1,B,98290,34043,48.0,709.0
2,C,98065,15081,21.0,718.0
3,D,98801,43242,61.0,709.0
4,E,98104,14143,20.0,707.0


Now we create dataframe that also have area column(having complete address of area)

In [25]:
from geopy.geocoders import Nominatim 
geolocator = Nominatim()
area = []
for i in new_facility_with_updated_staff['Zip Code']:
    location = geolocator.geocode(i) 
    area.append(location.address)
    
area

new_facility_with_updated_staff['Area'] = area
new_facility_with_updated_staff


  from ipykernel import kernelapp as app


Unnamed: 0,Facility ID,Zip Code,Population,New Staff Count,New Healthcare worker to patient ratio,Area
0,A,98007,27954,39.0,717.0,"Bellevue, Washington, 98007, USA"
1,B,98290,34043,48.0,709.0,"Snohomish, Washington, 98290, USA"
2,C,98065,15081,21.0,718.0,"Montalbano Elicona, SIC, 98065, Italia"
3,D,98801,43242,61.0,709.0,"Wenatchee, Washington, 98801, USA"
4,E,98104,14143,20.0,707.0,"ძველი თბილისის რაიონი, თბილისი, 98104, საქართველო"


In [26]:
# arrange the columns of df
new_facility_with_updated_staff = new_facility_with_updated_staff[['Facility ID', 'Zip Code', 'Area', 'Population', 'New Staff Count', 'New Healthcare worker to patient ratio']]
new_facility_with_updated_staff

Unnamed: 0,Facility ID,Zip Code,Area,Population,New Staff Count,New Healthcare worker to patient ratio
0,A,98007,"Bellevue, Washington, 98007, USA",27954,39.0,717.0
1,B,98290,"Snohomish, Washington, 98290, USA",34043,48.0,709.0
2,C,98065,"Montalbano Elicona, SIC, 98065, Italia",15081,21.0,718.0
3,D,98801,"Wenatchee, Washington, 98801, USA",43242,61.0,709.0
4,E,98104,"ძველი თბილისის რაიონი, თბილისი, 98104, საქართველო",14143,20.0,707.0


#### This is the approximate staff count that is distributed/ allocated according to population percentage in each area.

##### Now its time to find distance from each area to other area.
To perform this task, we use geopy library. It is a python library that makes it easy for Python developers to locate the coordinates of addresses, cities, countries, and landmarks across the globe using third-party geocoders and other data sources.

In [27]:
# install geopy
!pip install geopy

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [28]:
from geopy.geocoders import Nominatim 
geolocator = Nominatim()

  from ipykernel import kernelapp as app


In [29]:
# import geodesic module for finding distance between 2 locations
from geopy.distance import geodesic
iterator = len(new_facility_with_updated_staff.index)

distance_in_miles = []
distance_in_km = []
each_area = []
other_area = []

# find distance from each area to another area and store their result in the above lists
for i in range(iterator):
    location_a = geolocator.geocode(new_facility_with_updated_staff['Zip Code'][i]) 
    area1 = new_facility_with_updated_staff['Facility ID'][i]
    a_lat = location_a.latitude
    a_long = location_a.longitude
    point_a = (a_lat, a_long)
    
    for j in range(i+1, iterator):
        location_b = geolocator.geocode(new_facility_with_updated_staff['Zip Code'][j]) 
        b_lat = location_b.latitude
        b_long = location_b.longitude
        point_b = (b_lat, b_long)
        distance_miles = geodesic(point_a, point_b).miles 
        distance_in_miles.append(round(distance_miles,2))
        distance_km = geodesic(point_a, point_b).km 
        distance_in_km.append(round(distance_km,2))
        area2 = new_facility_with_updated_staff['Facility ID'][j]
        each_area.append(area1)
        other_area.append(area2)
        
        


In [30]:
# print all lists
print(each_area)
print(other_area)
print(distance_in_miles)
print(distance_in_km)

['A', 'A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'D']
['B', 'C', 'D', 'E', 'C', 'D', 'E', 'D', 'E', 'E']
[22.02, 5973.73, 85.35, 6230.16, 5953.63, 87.83, 6208.14, 5937.81, 1595.52, 6226.89]
[35.44, 9613.79, 137.36, 10026.46, 9581.43, 141.35, 9991.03, 9555.98, 2567.74, 10021.21]


 Now we make a dataframe that shows distance between each area to every other area

In [31]:
distance_df = pd.DataFrame({"Each Area": each_area,
                           "Other Area": other_area,
                           "Distance in Miles": distance_in_miles,
                           "Distance in Km": distance_in_km})

distance_df

Unnamed: 0,Distance in Km,Distance in Miles,Each Area,Other Area
0,35.44,22.02,A,B
1,9613.79,5973.73,A,C
2,137.36,85.35,A,D
3,10026.46,6230.16,A,E
4,9581.43,5953.63,B,C
5,141.35,87.83,B,D
6,9991.03,6208.14,B,E
7,9555.98,5937.81,C,D
8,2567.74,1595.52,C,E
9,10021.21,6226.89,D,E


In [32]:
# arrange all the columns
distance_df = distance_df[["Each Area", "Other Area", "Distance in Miles", "Distance in Km"]]
distance_df

Unnamed: 0,Each Area,Other Area,Distance in Miles,Distance in Km
0,A,B,22.02,35.44
1,A,C,5973.73,9613.79
2,A,D,85.35,137.36
3,A,E,6230.16,10026.46
4,B,C,5953.63,9581.43
5,B,D,87.83,141.35
6,B,E,6208.14,9991.03
7,C,D,5937.81,9555.98
8,C,E,1595.52,2567.74
9,D,E,6226.89,10021.21


Make pivot table so data become easily readable. It takes simple column-wise data as input, and groups the entries into a two-dimensional table that provides a multidimensional summarization of the data.

In [33]:
# Make pivot table for making above df easy to read

distance_df_pivot = distance_df.pivot(index = "Each Area", columns = "Other Area")
distance_df_pivot

Unnamed: 0_level_0,Distance in Miles,Distance in Miles,Distance in Miles,Distance in Miles,Distance in Km,Distance in Km,Distance in Km,Distance in Km
Other Area,B,C,D,E,B,C,D,E
Each Area,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
A,22.02,5973.73,85.35,6230.16,35.44,9613.79,137.36,10026.46
B,,5953.63,87.83,6208.14,,9581.43,141.35,9991.03
C,,,5937.81,1595.52,,,9555.98,2567.74
D,,,,6226.89,,,,10021.21


Now we can easily read and visualize data

##### Its time to install folium that is a powerful Python library that helps in creating several types of Leaflet maps.

In [34]:
!pip install folium==0.5.0
import folium

print('Folium installed and imported!')

You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Folium installed and imported!


In [35]:
# Create map of zip code 98007
import folium
loc = geolocator.geocode("98007") 
#print((location.latitude, location.longitude)) 
world_map = folium.Map(location=[loc.latitude, loc.longitude], zoom_start=13)

# display world map
world_map

In [44]:
# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()
lat = loc.latitude
lng = loc.longitude
# loop through the 100 crimes and add each to the incidents feature group
incidents.add_child(
        folium.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='black',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

# add incidents to map
label = geolocator.geocode("98007").address
folium.Marker([lat, lng], popup=label).add_to(world_map)   
world_map.add_child(incidents)


To show disatances from one area to another area , we make df that contain facility id with its area coordinates

In [37]:


latitudes = []
longitudes = []

for i in facility_area_zip_code:
    location = geolocator.geocode(i)
    lat = location.latitude
    latitudes.append(lat)
    lng = location.longitude
    longitudes.append(lng)


In [38]:
print(facility_id)
print(facility_area_zip_code)
print(latitudes)
print(longitudes)

['A', 'B', 'C', 'D', 'E']
['98007', '98290', '98065', '98801', '98104']
[47.6096892488886, 47.92502668173, 38.0242788631552, 47.4340912339767, 41.7096925]
[-122.146669547469, -122.077736219041, 15.0135874203079, -120.341206400495, 44.7970253]


In [39]:
area_with_coordinates = pd.DataFrame({"Facility Id":facility_id ,
                                     "Facility Area Zip Code": facility_area_zip_code,
                                     "Latitudes": latitudes,
                                     "Longitudes": longitudes})

area_with_coordinates

Unnamed: 0,Facility Area Zip Code,Facility Id,Latitudes,Longitudes
0,98007,A,47.609689,-122.14667
1,98290,B,47.925027,-122.077736
2,98065,C,38.024279,15.013587
3,98801,D,47.434091,-120.341206
4,98104,E,41.709693,44.797025


##### Start creating maps from one place to another

In [40]:
# map from facility id 'A' to 'B'
import folium 

my_map1 = folium.Map(location = [area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]], 
zoom_start = 9) 

folium.Marker([area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]], 
popup = "Facility Id: "+area_with_coordinates['Facility Id'][0]).add_to(my_map1) 

folium.Marker([area_with_coordinates['Latitudes'][1], area_with_coordinates['Longitudes'][1]], 
popup = "Facility Id: "+area_with_coordinates['Facility Id'][1]).add_to(my_map1) 

# Add a line to the map by using line method . 
# it connect both coordiates by the line 
# line_opacity implies intensity of the line 

folium.PolyLine(locations = [(area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]), (area_with_coordinates['Latitudes'][1], area_with_coordinates['Longitudes'][1])], 
line_opacity = 0.5).add_to(my_map1) 

my_map1


In [41]:
# map from facility id 'A' to 'C'
my_map2 = folium.Map(location = [area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]], 
zoom_start = 10) 

folium.Marker([area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]], 
popup = "Facility Id: "+area_with_coordinates['Facility Id'][0]).add_to(my_map2) 

folium.Marker([area_with_coordinates['Latitudes'][2], area_with_coordinates['Longitudes'][2]], 
popup = "Facility Id: "+area_with_coordinates['Facility Id'][2]).add_to(my_map2) 
folium.PolyLine(locations = [(area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]), (area_with_coordinates['Latitudes'][2], area_with_coordinates['Longitudes'][2])], 
line_opacity = 0.5).add_to(my_map2) 

my_map2


In [42]:
# map from facility id 'A' to 'D'
my_map3 = folium.Map(location = [area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]], 
zoom_start = 8.3) 

folium.Marker([area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]], 
popup = "Facility Id: "+area_with_coordinates['Facility Id'][0]).add_to(my_map3) 

folium.Marker([area_with_coordinates['Latitudes'][3], area_with_coordinates['Longitudes'][3]], 
popup = "Facility Id: "+area_with_coordinates['Facility Id'][3]).add_to(my_map3) 
folium.PolyLine(locations = [(area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]), (area_with_coordinates['Latitudes'][3], area_with_coordinates['Longitudes'][3])], 
line_opacity = 0.5).add_to(my_map3) 

my_map3


In [43]:
# map from facility id 'A' to 'E'
my_map4 = folium.Map(location = [area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]], 
zoom_start = 9) 

folium.Marker([area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]], 
popup = "Facility Id: "+area_with_coordinates['Facility Id'][0]).add_to(my_map4) 

folium.Marker([area_with_coordinates['Latitudes'][4], area_with_coordinates['Longitudes'][4]], 
popup = "Facility Id: "+area_with_coordinates['Facility Id'][4]).add_to(my_map4) 

# Add a line to the map by using line method . 
# it connect both coordiates by the line 
# line_opacity implies intensity of the line 

folium.PolyLine(locations = [(area_with_coordinates['Latitudes'][0], area_with_coordinates['Longitudes'][0]), (area_with_coordinates['Latitudes'][4], area_with_coordinates['Longitudes'][4])], 
line_opacity = 0.5).add_to(my_map4) 

my_map4


We can create more distance maps same as above

# Done