## Importing the necessary libraries

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from geopy.geocoders import Nominatim
import folium
from folium import plugins
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import requests

In [2]:
df = pd.read_csv("london-borough-profile.csv")
df.head()

Unnamed: 0,Code,borough,Inner/_Outer_London,GLA_Population_Estimate_2017,GLA_Household_Estimate_2017,Inland_Area_(Hectares),Population_density_(per_hectare)_2017,"Average_Age,_2017","Proportion_of_population_aged_0-15,_2015","Proportion_of_population_of_working-age,_2015",...,Happiness_score_2011-14_(out_of_10),Anxiety_score_2011-14_(out_of_10),Childhood_Obesity_Prevalance_(%)_2015/16,People_aged_17+_with_diabetes_(%),Mortality_rate_from_causes_considered_preventable_2012/14,Political_control_in_council,Proportion_of_seats_won_by_Conservatives_in_2014_election,Proportion_of_seats_won_by_Labour_in_2014_election,Proportion_of_seats_won_by_Lib_Dems_in_2014_election,Turnout_at_2014_local_elections
0,E09000001,City of London,Inner London,8800.0,5326.0,290.0,30.3,43.2,11.4,73.1,...,6.0,5.6,,2.6,129,,,,,
1,E09000002,Barking and Dagenham,Outer London,209000.0,78188.0,3611.0,57.9,32.9,27.2,63.1,...,7.1,3.1,28.5,7.3,228,Lab,0.0,100,0.0,36.5
2,E09000003,Barnet,Outer London,389600.0,151423.0,8675.0,44.9,37.3,21.1,64.9,...,7.4,2.8,20.7,6.0,134,Cons,50.8,.,1.6,40.5
3,E09000004,Bexley,Outer London,244300.0,97736.0,6058.0,40.3,39.0,20.6,62.9,...,7.2,3.3,22.7,6.9,164,Cons,71.4,23.8,0.0,39.6
4,E09000005,Brent,Outer London,332100.0,121048.0,4323.0,76.8,35.6,20.9,67.8,...,7.2,2.9,24.3,7.9,169,Lab,9.5,88.9,1.6,36.3


# Visualising London

In [4]:
address = 'London'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of London are {}, {}.'.format(latitude, longitude))

# create map of London using latitude and longitude values
map_London = folium.Map(location=[latitude, longitude], zoom_start=10)


map_London

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of London are 51.5073219, -0.1276474.


# We can't really see the limits of London on the above map, so I've used a geojson file with the outline to demarcate the limits of London

In [5]:
london_outline = 'greaterlondon.json'

In [6]:
address = 'London'

print('The geograpical coordinate of London are {}, {}.'.format(latitude, longitude))

# create map of London using latitude and longitude values
map_London = folium.Map(location=[latitude, longitude], zoom_start=10)
folium.GeoJson(london_outline,
              ).add_to(map_London)

map_London

The geograpical coordinate of London are 51.5073219, -0.1276474.


# In order to be able to tell the difference between boroughs in London, we plot the outline of each borough in London

In [7]:
borough_geo = 'london_boroughs_proper.geojson'

In [8]:
borough_geo = 'london_boroughs_proper.geojson'
address = 'London'

print('The geograpical coordinate of London are {}, {}.'.format(latitude, longitude))

# create map of London using latitude and longitude values
map_London = folium.Map(location=[latitude, longitude], zoom_start=10)
folium.GeoJson(borough_geo,
              ).add_to(map_London)

map_London

The geograpical coordinate of London are 51.5073219, -0.1276474.


In [9]:
borough_df = pd.DataFrame(df.borough)

## Finding the latitiude and longitude values for the head of each borough (i.e.) the location listed with the same name as the borough

In [10]:
borough_df['Latitude'] = 0
borough_df['Longitude'] = 0
borough_df.head()

Unnamed: 0,borough,Latitude,Longitude
0,City of London,0,0
1,Barking and Dagenham,0,0
2,Barnet,0,0
3,Bexley,0,0
4,Brent,0,0


In [18]:
geolocator = Nominatim()
for i in (range(0,borough_df['borough'].shape[0])):
    address = borough_df.loc[i,'borough'] + ", England"
    location = geolocator.geocode(address, timeout = None)
    if (location != None):
        borough_df.loc[i,'Latitude'] = location.latitude
        borough_df.loc[i,'Longitude'] = location.longitude

print(borough_df.shape)
borough_df.head()

  """Entry point for launching an IPython kernel.
  after removing the cwd from sys.path.


(33, 3)


Unnamed: 0,borough,Latitude,Longitude
0,City of London,51.515618,-0.091998
1,Barking and Dagenham,51.554117,0.150504
2,Barnet,51.648784,-0.172913
3,Bexley,51.441679,0.150488
4,Brent,51.441837,0.234833


In [19]:
address = 'London'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of London are {}, {}.'.format(latitude, longitude))

map_London = folium.Map(location=[latitude, longitude], zoom_start=10)

for borough, lat, lng in zip( borough_df['borough'], borough_df['Latitude'], borough_df['Longitude']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='red',
        fill=True,
        fill_color='#0000ff',
        fill_opacity=0.7,
        parse_html=False).add_to(map_London)


folium.GeoJson(borough_geo,
              ).add_to(map_London)

map_London

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of London are 51.5073219, -0.1276474.


# Considering the elderly population of London

In [20]:
age_df = pd.read_csv("age_data.csv")
age_df.columns = ['borough','Population 50+','50+','Female50+','Male 50+','Population 65+']
age_df.head()

Unnamed: 0,borough,Population 50+,50+,Female50+,Male 50+,Population 65+
0,Barking and Dagenham,43700,24,54,46,19200
1,Barnet,102700,29,54,46,47400
2,Bexley,77800,34,54,46,47900
3,Brent,77800,25,52,48,32600
4,Bromley,108200,35,54,46,51900


In [21]:
age_loc_df = borough_df.merge(age_df,on='borough')


In [22]:
age_loc_df.head()


Unnamed: 0,borough,Latitude,Longitude,Population 50+,50+,Female50+,Male 50+,Population 65+
0,City of London,51.515618,-0.091998,2400,32,50,50,1000
1,Barking and Dagenham,51.554117,0.150504,43700,24,54,46,19200
2,Barnet,51.648784,-0.172913,102700,29,54,46,47400
3,Bexley,51.441679,0.150488,77800,34,54,46,47900
4,Brent,51.441837,0.234833,77800,25,52,48,32600


In [23]:
# set number of clusters
kclusters = 4

age_grouped_clustering = age_loc_df[['Population 65+',
                                  'Latitude',
                                  'Longitude']]

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(age_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 1, 2, 2, 0, 2, 1, 2, 0, 0], dtype=int32)

In [24]:
age_loc_df['Cluster Labels'] = kmeans.labels_
age_loc_df.head()


Unnamed: 0,borough,Latitude,Longitude,Population 50+,50+,Female50+,Male 50+,Population 65+,Cluster Labels
0,City of London,51.515618,-0.091998,2400,32,50,50,1000,3
1,Barking and Dagenham,51.554117,0.150504,43700,24,54,46,19200,1
2,Barnet,51.648784,-0.172913,102700,29,54,46,47400,2
3,Bexley,51.441679,0.150488,77800,34,54,46,47900,2
4,Brent,51.441837,0.234833,77800,25,52,48,32600,0


In [25]:
address = 'London, England'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for bor, lat, lon, cluster in zip(age_loc_df['borough'],age_loc_df['Latitude'], age_loc_df['Longitude'], age_loc_df['Cluster Labels']):
    label = folium.Popup(bor + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    
folium.GeoJson(borough_geo,
              ).add_to(map_clusters)


map_clusters

  This is separate from the ipykernel package so we can avoid doing imports until


In [26]:
age_loc_df_clustered = age_loc_df.groupby('Cluster Labels').mean().reset_index()
age_loc_df_clustered

Unnamed: 0,Cluster Labels,Latitude,Longitude,Population 50+,50+,Female50+,Male 50+,Population 65+
0,0,51.553698,-0.148597,79800.0,27.5,53.333333,46.666667,35016.666667
1,1,51.500836,-0.003533,53333.333333,23.380952,53.52381,46.952381,22419.047619
2,2,51.466143,-0.027392,98050.0,31.75,54.0,46.0,47925.0
3,3,51.515618,-0.091998,2400.0,32.0,50.0,50.0,1000.0


## Observation

#### From the grouped dataframe, we can see that certain clusters have a lower proportion of elderly populatoin, compared to other clusters.
#### In general, as the cluster label increases, the proportion of elderly people increases.
#### Establishing a hospital in a borough with more elderly people is advantageous

In [27]:
# Obtaining the cluster with the highest proportion of elderly patients

opt_k_eld = age_loc_df_clustered.sort_values('Population 65+', ascending=False).iloc[0]['Cluster Labels']
opt_k_eld

2.0

In [28]:
opt_loc_eld = age_loc_df[age_loc_df['Cluster Labels'] == opt_k_eld]
opt_loc_eld

Unnamed: 0,borough,Latitude,Longitude,Population 50+,50+,Female50+,Male 50+,Population 65+,Cluster Labels
2,Barnet,51.648784,-0.172913,102700,29,54,46,47400,2
3,Bexley,51.441679,0.150488,77800,34,54,46,47900,2
5,Bromley,51.402805,0.014814,108200,35,54,46,51900,2
7,Croydon,51.371305,-0.101957,103500,29,54,46,44500,2


# Considering the crime rates in London

### These boroughs are potential locations for our new hospital. Now, we consider our second criteria - crime.
### We want to build a hospital in as safe a place as possible.

In [29]:
crime_df = pd.read_csv("crime rates.csv")
crime_df.columns = ['Code','borough','Year','Offences','Rate','Number_of_offences']
crime_df.head()

Unnamed: 0,Code,borough,Year,Offences,Rate,Number_of_offences
0,E09000002,Barking and Dagenham,1999-00,All recorded offences,120.5,19567
1,E09000003,Barnet,1999-00,All recorded offences,98.0,30708
2,E09000004,Bexley,1999-00,All recorded offences,95.1,20680
3,E09000005,Brent,1999-00,All recorded offences,127.7,33253
4,E09000006,Bromley,1999-00,All recorded offences,89.8,26474


In [30]:
crime_loc_df = borough_df.merge(crime_df,on='borough')
crime_loc_df.Number_of_offences = [(int)(a.replace(',', '')) for a in crime_loc_df.Number_of_offences.values]


In [31]:
crime_loc_df.head()


Unnamed: 0,borough,Latitude,Longitude,Code,Year,Offences,Rate,Number_of_offences
0,Barking and Dagenham,51.554117,0.150504,E09000002,1999-00,All recorded offences,120.5,19567
1,Barking and Dagenham,51.554117,0.150504,E09000002,2000-01,All recorded offences,123.6,20253
2,Barking and Dagenham,51.554117,0.150504,E09000002,2001-02,All recorded offences,124.0,20538
3,Barking and Dagenham,51.554117,0.150504,E09000002,2002-03,All recorded offences,122.6,20394
4,Barking and Dagenham,51.554117,0.150504,E09000002,2003-04,All recorded offences,133.2,22144


In [32]:
crime_loc_df = crime_loc_df.groupby('borough').mean().reset_index()


In [33]:
crime_loc_df


Unnamed: 0,borough,Latitude,Longitude,Rate,Number_of_offences
0,Barking and Dagenham,51.554117,0.150504,21.952778,3853.844444
1,Barnet,51.648784,-0.172913,16.923889,5728.666667
2,Bexley,51.441679,0.150488,14.442778,3261.266667
3,Brent,51.441837,0.234833,20.954444,6014.255556
4,Bromley,51.402805,0.014814,16.421667,5002.144444
5,Camden,51.542304,-0.13956,36.738889,7772.544444
6,Croydon,51.371305,-0.101957,18.808889,6577.155556
7,Ealing,51.512655,-0.305195,20.957778,6727.477778
8,Enfield,51.652085,-0.081017,17.282778,5104.055556
9,Greenwich,52.036732,1.168934,22.347222,5302.955556


In [34]:
kclusters = 4

crime_grouped_clustering = crime_loc_df[['Rate',
                                  'Latitude',
                                  'Longitude']]

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(crime_grouped_clustering)

kmeans.labels_[0:10]

array([3, 0, 0, 3, 0, 2, 3, 3, 0, 3], dtype=int32)

In [35]:
crime_loc_df['Cluster Labels'] = kmeans.labels_
crime_loc_df.head()

Unnamed: 0,borough,Latitude,Longitude,Rate,Number_of_offences,Cluster Labels
0,Barking and Dagenham,51.554117,0.150504,21.952778,3853.844444,3
1,Barnet,51.648784,-0.172913,16.923889,5728.666667,0
2,Bexley,51.441679,0.150488,14.442778,3261.266667,0
3,Brent,51.441837,0.234833,20.954444,6014.255556,3
4,Bromley,51.402805,0.014814,16.421667,5002.144444,0


In [36]:
crime_clustered = crime_loc_df.groupby('Cluster Labels').mean().reset_index()
crime_clustered


Unnamed: 0,Cluster Labels,Latitude,Longitude,Rate,Number_of_offences
0,0,51.452055,-0.381673,15.738586,3769.631818
1,1,51.497321,-0.137149,62.828889,13520.216667
2,2,51.468512,0.045792,30.04625,6547.340278
3,3,51.543514,0.004343,21.571944,5669.257407


### From the grouped dataframe, we see that the number of offences taking place is least in cluster 0

### This shows that a borough in certain clusters are the safer than others, to establish our new hospital.

In [37]:
# Obtaining the cluster with the highest proportion of elderly patients

opt_k_safe = crime_clustered.sort_values('Rate').iloc[0]['Cluster Labels']
opt_k_safe

0.0

In [38]:
safe_loc = crime_loc_df[crime_loc_df['Cluster Labels']==opt_k_safe]
safe_loc

Unnamed: 0,borough,Latitude,Longitude,Rate,Number_of_offences,Cluster Labels
1,Barnet,51.648784,-0.172913,16.923889,5728.666667,0
2,Bexley,51.441679,0.150488,14.442778,3261.266667,0
4,Bromley,51.402805,0.014814,16.421667,5002.144444,0
8,Enfield,51.652085,-0.081017,17.282778,5104.055556,0
13,Harrow,51.596769,-0.337275,13.68,3084.511111,0
14,Havering,51.035863,-2.841754,15.923333,3700.533333,0
19,Kingston upon Thames,51.409628,-0.306262,15.887778,2473.683333,0
22,Merton,51.410803,-0.188098,15.618333,3041.705556,0
24,Redbridge,51.57632,0.04541,18.231667,4776.827778,0
25,Richmond upon Thames,51.440354,-0.308151,14.341667,2614.144444,0


In [39]:
address = 'London, England'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for bor, lat, lon, cluster in zip(crime_loc_df['borough'],crime_loc_df['Latitude'], crime_loc_df['Longitude'], crime_loc_df['Cluster Labels']):
    label = folium.Popup(bor + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

folium.GeoJson(borough_geo,
              ).add_to(map_clusters)
    
map_clusters

  This is separate from the ipykernel package so we can avoid doing imports until


## Finally, we consider an important factor - whether or not a hospital already exists in the location we are planning to build a new hospital.

### For this, we require the FourSquar API.

In [40]:
CLIENT_ID = 'KIDZBR1G0WK3DDCJF1HF1VRL2B0UIOXOFVUXRBHTPA023QXR' # your Foursquare ID
CLIENT_SECRET = 'EM5TMCK3MPTS4EHQHKJONMXH0NMEH5ABUHOWZOQIWH5OJPPN' # your Foursquare Secret
VERSION = '20190826' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KIDZBR1G0WK3DDCJF1HF1VRL2B0UIOXOFVUXRBHTPA023QXR
CLIENT_SECRET:EM5TMCK3MPTS4EHQHKJONMXH0NMEH5ABUHOWZOQIWH5OJPPN


### We define a function to find all the hospitals present within a 1000km radius of each borough's latitude and longitude.

(This is upper-limited at 100)

In [41]:
category_id = "4bf58dd8d48988d196941735"
LIMIT = 100

def getNearbyHospitals(names, latitudes, longitudes, radius=1000):

    hospitals_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)

        # create the API request URL
        url= "https://api.foursquare.com/v2/venues/search?ll={},{}&categoryId={}&client_id={}&client_secret={}&radius={}&limit={}&v={}".format(
        lat,
        lng,
        category_id,
        CLIENT_ID,
        CLIENT_SECRET,
        radius,
        LIMIT,
        VERSION
        )

        # make the GET request
        results = requests.get(url).json()['response']['venues']

        # return only relevant information for each nearby hospital
        hospitals_list.append([(
            name,
            lat,
            lng,
            v['name'],
            v['location']['lat'],
            v['location']['lng']) for v in results])

    nearby_hospitals = pd.DataFrame([item for hospital_list in hospitals_list for item in hospital_list])
    nearby_hospitals.columns = ['Borough',
                  'Borough Latitude',
                  'Borough Longitude',
                  'Hospital',
                  'Hospital Latitude',
                  'Hospital Longitude']

    return(nearby_hospitals)

In [None]:
London_hospitals = getNearbyHospitals(names=borough_df['borough'],
                                   latitudes=borough_df['Latitude'],
                                   longitudes=borough_df['Longitude']
                                )


In [None]:
London_hospitals.head()


In [None]:
London_hospitals.groupby('Borough').count().max()['Hospital']


### We see that the max number of nearby hospitals is 33.

### Grouping the dataframe, based on unique hospital names, and taking the centroid of the co-ordinates:

In [None]:
London_hosp_cluster = pd.DataFrame(London_hospitals.groupby('Hospital').mean().reset_index())


In [None]:
London_hosp_cluster.head()


### By observation, we see that the closest, well-known hospitals in London are all centered within the limits of the City of London borough.

## Visualisation of the 19 hospitals already present (bright red spots):

In [None]:
address = 'City of London, England'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

borough_geo = 'london_boroughs_proper.geojson'
address = 'London'

print('The geograpical coordinate of London are {}, {}.'.format(latitude, longitude))

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

for ven, lat, lon in zip(London_hosp_cluster['Hospital'],London_hosp_cluster['Hospital Latitude'], London_hosp_cluster['Hospital Longitude']):
    label = folium.Popup(str(ven), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7).add_to(map_clusters)

folium.GeoJson(borough_geo).add_to(map_clusters)

In [None]:
map_clusters

From the visualisation of the map of London, we see that the existing hospitals are clustered toward the center, concentrated around the borough of the City of London. 

So, that eliminates the City of London and neighboring boroughs as potential locations.

Instead, we consider the initial two criteria : Proportion of elderly patients and crime rate

In [42]:
opt_loc_eld

Unnamed: 0,borough,Latitude,Longitude,Population 50+,50+,Female50+,Male 50+,Population 65+,Cluster Labels
2,Barnet,51.648784,-0.172913,102700,29,54,46,47400,2
3,Bexley,51.441679,0.150488,77800,34,54,46,47900,2
5,Bromley,51.402805,0.014814,108200,35,54,46,51900,2
7,Croydon,51.371305,-0.101957,103500,29,54,46,44500,2


In [43]:
safe_loc

Unnamed: 0,borough,Latitude,Longitude,Rate,Number_of_offences,Cluster Labels
1,Barnet,51.648784,-0.172913,16.923889,5728.666667,0
2,Bexley,51.441679,0.150488,14.442778,3261.266667,0
4,Bromley,51.402805,0.014814,16.421667,5002.144444,0
8,Enfield,51.652085,-0.081017,17.282778,5104.055556,0
13,Harrow,51.596769,-0.337275,13.68,3084.511111,0
14,Havering,51.035863,-2.841754,15.923333,3700.533333,0
19,Kingston upon Thames,51.409628,-0.306262,15.887778,2473.683333,0
22,Merton,51.410803,-0.188098,15.618333,3041.705556,0
24,Redbridge,51.57632,0.04541,18.231667,4776.827778,0
25,Richmond upon Thames,51.440354,-0.308151,14.341667,2614.144444,0


In [45]:

ideal_locations = opt_loc_eld.merge(safe_loc,on='borough')
ideal_locations = ideal_locations[['borough','Latitude_x','Longitude_x','Rate','Population 65+']]

ideal_locations

Unnamed: 0,borough,Latitude_x,Longitude_x,Rate,Population 65+
0,Barnet,51.648784,-0.172913,16.923889,47400
1,Bexley,51.441679,0.150488,14.442778,47900
2,Bromley,51.402805,0.014814,16.421667,51900


In [None]:
address = 'London, England'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

borough_geo = 'london_boroughs_proper.geojson'
address = 'London'

print('The geograpical coordinate of London are {}, {}.'.format(latitude, longitude))

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

for ven, lat, lon in zip(ideal_locations['borough'],ideal_locations['Latitude_x'], ideal_locations['Longitude_x']):
    label = folium.Popup(str(ven), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7).add_to(map_clusters)
    print(ven)

folium.GeoJson(borough_geo).add_to(map_clusters)

map_clusters

## Thus using techniques of data analysis, visualisation tools like folium and APIs like FourSquare, I have identified 3 potential locations where it will be best suited to build a hospital for the elderly.