In [3]:
import pandas as pd
import numpy as np
import matplotlib as plt
import json # library to handle JSON files
import requests # a package to send http request
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
from sklearn.cluster import DBSCAN

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

In [4]:
# download the nyc data json file and calling it 'newyork_data.json' 
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
# reading the 'newyork_data.json' file and assign it to a new variable for future use
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
# our relevant data is in the 'features' key
neighborhoods_data = newyork_data['features']

In [5]:
# in this part we will want to convert neighborhoods_data to a pandas data frame
# so first we define an empty data frame with the following columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
neighborhoods = pd.DataFrame(columns=column_names)
# fill in the data frame
for data in neighborhoods_data:
    borough = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
print(neighborhoods.shape)
neighborhoods.head()

(306, 4)


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [64]:
#nyc hospital lists per area according to url = 'https://en.wikipedia.org/wiki/List_of_hospitals_in_New_York_City'

Manhattan_hospitals_list = ['Bellevue Hospital Center', 'Coler-Goldwater Specialty Hospital', 'Gracie Square Hospital', 'Harlem Hospital Center', 'Hospital for Special Surgery', 
                               'Lenox Hill Hospital', 'Manhattan Eye, Ear and Throat Hospital', 'Manhattan Psychiatric Center','Manhattan Veterans Administration Hospital', 
                               'Memorial Sloan Kettering Cancer Center', 'Metropolitan Hospital Center', 'Mount Sinai Beth Israel', 'Mount Sinai Hospital', 'Mount Sinai West', 
                               'NewYork–Presbyterian - Allen Hospital', 'NewYork–Presbyterian/Columbia University Medical Center','NewYork–Presbyterian/Lower Manhattan Hospital', 
                               'NewYork-Presbyterian/Weill Cornell Medical Center','New York Eye and Ear Infirmary', 'NYU Hospital for Joint Diseases',
                               'NYU Langone Medical Center', 'Rockefeller Institute Hospital']

broonks_hospitals_list = ['Bronx Psychiatric Center', 'Bronx-Lebanon Hospital Center', 'Calvary Hospital', 'Jacobi Medical Center', 'James J. Peters VA Medical Center', 'Lincoln Medical Center',
                          'Montefiore Medical Center', 'Moses Division', 'Weiler Division', 'Wakefield Division', 'North Central Bronx Hospital', 'St. Barnabas Hospital']

brooklyn_hospitals_list = ['Brookdale University Hospital and Medical Center', 'Brooklyn Hospital Center', 'Brooklyn V.A. Medical Center', 'Coney Island Hospital', 'Interfaith Medical Center',
                           'Kingsbrook Jewish Medical Center', 'Kings County Hospital Center', 'Maimonides Medical Center', 'Mount Sinai Brooklyn', 'New York Community Hospital',
                           'NewYork-Presbyterian Brooklyn Methodist Hospital','NYU Lutheran Medical Center', 'University Hospital of Brooklyn', 'Woodhull Medical and Mental Health Center',
                           'Wyckoff Heights Medical Center']

queens_hospitals_list = ['Creedmoor Psychiatric Center', 'Elmhurst Hospital Center', 'The Floating Hospital', 'Flushing Hospital Medical Center', 'Long Island Jewish Forest Hills',
                         'Jamaica Hospital','Long Island Jewish Medical Center', 'Mount Sinai Queens', 'NewYork–Presbyterian/Queens', 'Queens Hospital Center', 'Zucker Hillside Hospital']

Staten_hospitals_Island_list = ['Richmond University Medical Center', 'Staten Island University Hospital']

nyc_hospitals_names = Manhattan_hospitals_list + broonks_hospitals_list + brooklyn_hospitals_list + queens_hospitals_list + Staten_hospitals_Island_list

#appending the above hospital list to a new data frame with the following columns
df_nyc_hospitals = pd.DataFrame(columns = ['Hospital Name', 'Latitude', 'Longitude'])
df_nyc_hospitals['Hospital Name'] = nyc_hospitals_names
print(df_nyc_hospitals.shape)
df_nyc_hospitals.head()

(62, 3)


Unnamed: 0,Hospital Name,Latitude,Longitude
0,Bellevue Hospital Center,,
1,Coler-Goldwater Specialty Hospital,,
2,Gracie Square Hospital,,
3,Harlem Hospital Center,,
4,Hospital for Special Surgery,,


In [45]:
# using geopy to find hospitals coordinates
df_nyc_hospitals_2 = df_nyc_hospitals['Hospital Name']
hospital_lat = []
hospital_long = []
hospitals_that_their_coordinates_need_to_be_find_manualy_1 = []
for i in df_nyc_hospitals_2[:17]:
    geolocator_1 = Nominatim(user_agent="agent_1") 
    location_1 = geolocator_1.geocode(i)
    if location_1 != None:
        print(location_1.address) 
        print((location_1.latitude, location_1.longitude))
        hospital_lat.append(location_1.latitude)
        hospital_long.append(location_1.longitude)
    else:
        print('Can not find the lat and long of hospital:', i)
        hospitals_that_their_coordinates_need_to_be_find_manualy_1.append(i)

NYC Health + Hospitals/Bellevue, 462, 1st Avenue, Kips Bay, Manhattan Community Board 6, Manhattan, New York County, New York, 10010, United States of America
(40.73926685, -73.9753598184537)
Can not find the lat and long of hospital: Coler-Goldwater Specialty Hospital
Gracie Square Hospital, 420, East 76th Street, Upper East Side, Manhattan Community Board 8, Manhattan, New York County, New York, 10021, United States of America
(40.7697091, -73.9529748)
NYC Health + Hospitals/Harlem, 506, Malcolm X Boulevard, Harlem, Manhattan Community Board 10, Manhattan, New York County, New York, 10037, United States of America
(40.81470595, -73.939280346021)
Hospital for Special Surgery, 535, East 70th Street, Lenox Hill, Manhattan Community Board 8, Manhattan, New York County, New York, 10021, United States of America
(40.765164, -73.9526899)
Lenox Hill Hospital, 100, East 77th Street, Upper East Side, Manhattan Community Board 8, Manhattan, New York County, New York, 10075, United States of Ame

In [46]:
# adding latitude and longitude to nyc hospitals data frame
hospitals_that_their_coordinates_need_to_be_find_manualy_1

['Coler-Goldwater Specialty Hospital',
 'Manhattan Eye, Ear and Throat Hospital',
 'Manhattan Veterans Administration Hospital',
 'NewYork–Presbyterian/Columbia University Medical Center']

In [47]:
# continue searching for nyc hospitals latitude and longitude
hospital_lat_2 = []
hospital_long_2 = []
hospitals_that_their_coordinates_need_to_be_find_manualy_2 = []
for j in df_nyc_hospitals_2[17:35]:
    geolocator_2 = Nominatim(user_agent="agent_2") 
    location_2 = geolocator_2.geocode(j)
    if location_2 != None:
        print(location_2.address) 
        print((location_2.latitude, location_2.longitude))
        hospital_lat_2.append(location_2.latitude)
        hospital_long_2.append(location_2.longitude)
    else:
        print('Can not find the lat and long of hospital:', j)
        hospitals_that_their_coordinates_need_to_be_find_manualy_2.append(j)

NewYork-Presbyterian / Weill Cornell Medical Center, 525, East 68th Street, Lenox Hill, Manhattan Community Board 8, Manhattan, New York County, New York, 10065, United States of America
(40.7647021, -73.9540033012178)
New York Eye and Ear Infirmary of Mount Sinai, 310, East 14th Street, East Village, Manhattan Community Board 3, Manhattan, New York County, New York, 10003, United States of America
(40.7318799, -73.9845807)
NYU Langone Orthopedic Hospital, 301, East 17th Street, Gramercy, Manhattan Community Board 6, Manhattan, New York County, New York, 10003, United States of America
(40.7343943, -73.9829913720073)
NYU Langone Medical Center, East 33rd Street, Kips Bay, Manhattan Community Board 6, Manhattan, New York County, New York, 10016, United States of America
(40.74230265, -73.9735653999092)
Rockefeller Institute of Government, State Street, Center Square, Albany, Albany County, New York, 12210, United States
(42.6575183, -73.7665628211092)
Bronx Psychiatric Center, 1500, Wat

In [48]:
hospitals_that_their_coordinates_need_to_be_find_manualy_2

['Bronx-Lebanon Hospital Center',
 'James J. Peters VA Medical Center',
 'Brookdale University Hospital and Medical Center']

In [49]:
hospital_lat_3 = []
hospital_long_3 = []
hospitals_that_their_coordinates_need_to_be_find_manualy_3 = []
for k in df_nyc_hospitals_2[35:52]:
    geolocator_3 = Nominatim(user_agent="agent_3") 
    location_3 = geolocator_2.geocode(k)
    if location_3 != None:
        print(location_3.address) 
        print((location_3.latitude, location_3.longitude))
        hospital_lat_3.append(location_3.latitude)
        hospital_long_3.append(location_3.longitude)
    else:
        print('Can not find the lat and long of hospital:', k)
        hospitals_that_their_coordinates_need_to_be_find_manualy_3.append(k)

Brooklyn Hospital Center, 121, DeKalb Avenue, Fort Greene, Brooklyn, Kings County, New York, 11201, United States of America
(40.6905444, -73.9779303800582)
Medical Center, 2146, Beverley Road, Flatbush, Brooklyn, Kings County, New York, 11203, United States of America
(40.6444491, -73.957178)
Coney Island Hospital, 2601, Ocean Parkway, Gravesend, Brooklyn, Kings County, New York, 11235, United States of America
(40.58614055, -73.9648475893234)
Interfaith Medical Center, 1545, Atlantic Avenue, Bedford-Stuyvesant, Brooklyn, Kings County, New York, 11213, United States of America
(40.6785782, -73.9375058117873)
Kingsbrook Jewish Medical Center, 585, Schenectady Avenue, East Flatbush, Brooklyn, Kings County, New York, 11203, United States of America
(40.6598206, -73.9332212733163)
Kings County Hospital Center, 451, Clarkson Avenue, East Flatbush, Brooklyn, Kings County, New York, 11203, United States of America
(40.6566962, -73.943465837867)
Maimonides Medical Center, 4802, 10th Avenue, B

In [50]:
hospitals_that_their_coordinates_need_to_be_find_manualy_3

['Woodhull Medical and Mental Health Center']

In [51]:
hospital_lat_4 = []
hospital_long_4 = []
hospitals_that_their_coordinates_need_to_be_find_manualy_4 = []
for q in df_nyc_hospitals_2[52:62]:
    geolocator_4 = Nominatim(user_agent="agent_4") 
    location_4 = geolocator_2.geocode(q)
    if location_4 != None:
        print(location_4.address) 
        print((location_4.latitude, location_4.longitude))
        hospital_lat_4.append(location_4.latitude)
        hospital_long_4.append(location_4.longitude)
    else:
        print('Can not find the lat and long of hospital:', q)
        hospitals_that_their_coordinates_need_to_be_find_manualy_4.append(q)

Flushing Hospital Medical Center, 45-00, Parsons Boulevard, Murray Hill, Queens, Queens County, New York, 11354, United States of America
(40.75544455, -73.8167586963968)
Long Island Jewish Forest Hills, 102-01, 66th Avenue, Rego Park, Queens, Queens County, New York, 11375, United States of America
(40.72903425, -73.8515520353821)
Jamaica, 173 Viviendas, Hospital, Centenario, Departamento Confluencia, Neuquén, 8309, Argentina
(-38.8233903, -68.1331576)
Long Island Jewish Medical Center, 270-05, 76th Avenue, Glen Oaks, Queens, Queens County, New York, 11040, United States of America
(40.7539227, -73.708546368738)
Mount Sinai Queens, 25-10, 30th Avenue, Astoria, Queens, Queens County, New York, 11102, United States of America
(40.76810675, -73.9249387937634)
NewYork-Presbyterian/Queens, 56-45, Main Street, Murray Hill, Queens, Queens County, New York, 11355, United States of America
(40.74725645, -73.8251968475337)
NYC Health + Hospitals/Queens, 82-68, 164th Street, Fresh Meadows, Queen

In [52]:
hospitals_that_their_coordinates_need_to_be_find_manualy_4

[]

In [65]:
hospitals_that_their_coordinates_need_to_be_find_manualy = hospitals_that_their_coordinates_need_to_be_find_manualy_1 + hospitals_that_their_coordinates_need_to_be_find_manualy_2 + hospitals_that_their_coordinates_need_to_be_find_manualy_3 + hospitals_that_their_coordinates_need_to_be_find_manualy_4
hospitals_nyc_lat = hospital_lat + hospital_lat_2 + hospital_lat_3 + hospital_lat_4
hospitals_nyc_long = hospital_long + hospital_long_2 + hospital_long_3 + hospital_long_4

print(hospitals_that_their_coordinates_need_to_be_find_manualy, '\n')
print(hospitals_nyc_lat, '\n')
print(hospitals_nyc_long, '\n')

['Coler-Goldwater Specialty Hospital', 'Manhattan Eye, Ear and Throat Hospital', 'Manhattan Veterans Administration Hospital', 'NewYork–Presbyterian/Columbia University Medical Center', 'Bronx-Lebanon Hospital Center', 'James J. Peters VA Medical Center', 'Brookdale University Hospital and Medical Center', 'Woodhull Medical and Mental Health Center'] 

[40.73926685, 40.7697091, 40.81470595, 40.765164, 40.77364275, 40.78864315, 40.76445335, 40.785027, 40.7331696, 41.86071605, 40.76966015, 40.8732784, 40.7103146, 40.7647021, 40.7318799, 40.7343943, 40.74230265, 42.6575183, 40.84753875, -42.86605555, 40.85603475, 40.58478275, 51.3396715, 40.8800853, 21.9396049, 53.5087261, 40.8803581, 50.40744295, 40.6905444, 40.6444491, 40.58614055, 40.6785782, 40.6598206, 40.6566962, 40.6395376, 40.6186724, 40.61388355, 40.66782545, 40.64676075, 40.6549107, 40.70414415, 40.74115045, 40.74476995, 47.22582805, 40.75544455, 40.72903425, -38.8233903, 40.7539227, 40.76810675, 40.74725645, 40.71787545, 40.751

In [54]:
# finding the lat, long of the hospitals that geopy didn't find
hospitals_with_no_coordinates_addreses = ['900 Main St', '210 E 64th St', '423 E 23rd St', '622 W 168th St', '2432 Grand Concourse', '130 W Kingsbridge Rd', '1 Brookdale Plaza', '760 Broadway']
hospital_lat_5 = []
hospital_long_5 = []
hospitals_that_their_coordinates_need_to_be_find_manualy_5 = []

for hospital_address in hospitals_with_no_coordinates_addreses:
    geolocator_5 = Nominatim(user_agent="agent_5") 
    location_5 = geolocator_5.geocode(hospital_address +',NY')
    if location_5 != None:
        hospital_lat_5.append(location_5.latitude)
        hospital_long_5.append(location_5.longitude)
    else:
        print('Can not find the lat and long of hospital:', hospital_address)
        hospitals_that_their_coordinates_need_to_be_find_manualy_5.append(hospital_address)

In [66]:
# adding lat, long to the hospitals that geopy didn't find
addition_information_nyc_hospitals = pd.DataFrame(columns = ['Hospital Name','Latitude','Longitude'])
addition_information_nyc_hospitals['Hospital Name'] = hospitals_that_their_coordinates_need_to_be_find_manualy
addition_information_nyc_hospitals['Latitude'] = hospital_lat_5
addition_information_nyc_hospitals['Longitude'] = hospital_long_5
addition_information_nyc_hospitals



Unnamed: 0,Hospital Name,Latitude,Longitude
0,Coler-Goldwater Specialty Hospital,40.770414,-73.942312
1,"Manhattan Eye, Ear and Throat Hospital",40.764233,-73.963647
2,Manhattan Veterans Administration Hospital,42.73301,-73.703754
3,NewYork–Presbyterian/Columbia University Medic...,40.841173,-73.940209
4,Bronx-Lebanon Hospital Center,40.860632,-73.897719
5,James J. Peters VA Medical Center,40.8672,-73.905511
6,Brookdale University Hospital and Medical Center,40.911215,-73.831726
7,Woodhull Medical and Mental Health Center,40.699336,-73.942747


In [67]:
index_list = []
index = 0
while index in range(len(df_nyc_hospitals['Hospital Name'])):
    for no_coordinates_hospital in hospitals_that_their_coordinates_need_to_be_find_manualy:
        if df_nyc_hospitals['Hospital Name'][index] == no_coordinates_hospital:
            index_list.append(index)
    index = index + 1
print(index_list)

df_nyc_hospitals.drop(index_list, axis = 0, inplace = True)
df_nyc_hospitals.head()
df_nyc_hospitals['Latitude'] = hospitals_nyc_lat
df_nyc_hospitals['Longitude'] = hospitals_nyc_long
df_nyc_hospitals.head()

[1, 6, 8, 15, 23, 26, 34, 47]


Unnamed: 0,Hospital Name,Latitude,Longitude
0,Bellevue Hospital Center,40.739267,-73.97536
2,Gracie Square Hospital,40.769709,-73.952975
3,Harlem Hospital Center,40.814706,-73.93928
4,Hospital for Special Surgery,40.765164,-73.95269
5,Lenox Hill Hospital,40.773643,-73.960862


In [68]:
# concat addition_information_nyc_hospitals and df_nyc_hospitals
nyc_hospitals = pd.concat([df_nyc_hospitals, addition_information_nyc_hospitals], axis = 0)
nyc_hospitals = nyc_hospitals.reset_index(drop = True)
nyc_hospitals

Unnamed: 0,Hospital Name,Latitude,Longitude
0,Bellevue Hospital Center,40.739267,-73.975360
1,Gracie Square Hospital,40.769709,-73.952975
2,Harlem Hospital Center,40.814706,-73.939280
3,Hospital for Special Surgery,40.765164,-73.952690
4,Lenox Hill Hospital,40.773643,-73.960862
5,Manhattan Psychiatric Center,40.788643,-73.930388
6,Memorial Sloan Kettering Cancer Center,40.764453,-73.956944
7,Metropolitan Hospital Center,40.785027,-73.944979
8,Mount Sinai Beth Israel,40.733170,-73.982046
9,Mount Sinai Hospital,41.860716,-87.694496


In [69]:
# create map of New York city consists of hospitals and neighborhoods

#import folium package
!conda install -c conda-forge folium=0.5.0 --yes
import folium

map_newyork_city = folium.Map(location = [40.730610, -73.935242], zoom_start=20)

# add markers to map
for lat, lng, neighborhood_label in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Neighborhood']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=neighborhood_label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork_city)  

for lat, lng, hospital_label in zip(nyc_hospitals['Latitude'], nyc_hospitals['Longitude'], nyc_hospitals['Hospital Name']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=hospital_label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork_city) 
    

map_newyork_city

Solving environment: done

# All requested packages already installed.



In [127]:
nyc_hospitals.head()

Unnamed: 0,Hospital Name,Latitude,Longitude
0,Bellevue Hospital Center,40.739267,-73.97536
1,Gracie Square Hospital,40.769709,-73.952975
2,Harlem Hospital Center,40.814706,-73.93928
3,Hospital for Special Surgery,40.765164,-73.95269
4,Lenox Hill Hospital,40.773643,-73.960862


In [253]:
# preparing data for clustering
cluster__hospital_dataframe = nyc_hospitals.iloc[:,1:3]
#cluster__hospital_dataframe

cluster_neighborhoods = neighborhoods.iloc[:, 2:4]
#cluster_neighborhoods

cluster_dataframe = pd.concat([cluster_neighborhoods, cluster__hospital_dataframe], axis = 0)
cluster_dataframe.reset_index(drop = True)

cluster_dataframe.head()
cluster_dataframe.shape

(368, 2)

In [255]:
import matplotlib as plt
# dbscan clustering
nyc_hospitals_cluster = DBSCAN(eps = 0.025, min_samples = 5).fit(cluster_dataframe)
# clusters plot using the function dbscan_clusters that was define at the beggining of the notebook
print(nyc_hospitals_cluster.labels_)
len(nyc_hospitals_cluster.labels_)
#plt.plot(kind = 'scatter', x = 'Latitude', y = 'Longitude', c=nyc_hospitals_cluster.labels_, cmap='Paired')
#plt.title("DBSCAN")

[ 0  0  0  0  0  0  0  0  0  0  0  0 -1  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0 -1  0 -1  0  0  0 -1  0  0  0  0  0  0  0  0  0  0
  0  0  0 -1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0 -1  0  0  0  0  0  0  0  0  0  0
  0  0  0  0 -1  0  0 -1  0 -1 -1  1  1  0  0  0  0  1  1  1  0 -1 -1 -1
 -1 -1  2  2 -1  0  0 -1  2  2  2 -1  0  1  0  0  0  0  0  0 -1  0 -1 -1
  2 -1  1  0  0  3  3  3  3  3  3  3  3  3  3  3  3  3 -1  3  3  3  4  4
  4  4  3  3  3  0  0  0  3  3  3  3  3  3  3  3  3  3  3  3  4  4  4  4
  4  4  4  3 -1 -1  3  0  0  0  0  3  3  3  3  3  3  3  3  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  3  3  4  3
 -1  0  0  4  3  4  0  0  0  0  0  0  0  0  2  2  0  3  0  0  0  0  0  0
  0  0  0 -1  0  0  0  0  0  0  0 -1  0 -1  0 -1 -1

368

In [256]:
list1 = neighborhoods['Neighborhood'].to_list()
list2 = nyc_hospitals['Hospital Name'].to_list()
cluster_dataframe['Neighborhood\hospital name'] = list1 + list2
cluster_dataframe['Label'] = nyc_hospitals_cluster.labels_
cluster_dataframe.head()

Unnamed: 0,Latitude,Longitude,Neighborhood\hospital name,Label
0,40.894705,-73.847201,Wakefield,0
1,40.874294,-73.829939,Co-op City,0
2,40.887556,-73.827806,Eastchester,0
3,40.895437,-73.905643,Fieldston,0
4,40.890834,-73.912585,Riverdale,0


In [305]:
# outliers neighborhoods   
outliers = cluster_dataframe[cluster_dataframe['Label'] == -1]
outliers
outliers_hospitals = []
for h_1 in outliers['Neighborhood\hospital name']:
    for h_2 in list2:
        if h_1 == h_2:
            outliers_hospitals.append(h_1)
set_outliers = set(outliers['Neighborhood\hospital name'])
set_outliers_hospitals = set(outliers_hospitals)
neighborhoods_needs_nearby_hospital = set_outliers.difference(set_outliers_hospitals)
neighborhoods_needs_nearby_hospital = list(neighborhoods_needs_nearby_hospital)
print('The neighborhoods that need a nearby hospital are:', neighborhoods_needs_nearby_hospital)

The neighborhoods that need a nearby hospital are: ['Douglaston', 'Rosedale', 'South Ozone Park', 'Travis', 'Rochdale', 'Sea Gate', 'Bath Beach', 'Rockaway Park', 'Breezy Point', 'Bay Ridge', 'Dyker Heights', 'Chelsea', 'Belle Harbor', 'Laurelton', 'St. Albans', 'Neponsit', 'Bayside', 'City Island', 'Springfield Gardens', 'Bloomfield', 'Fort Hamilton', 'Roxbury', 'Cambria Heights', 'Bay Terrace', 'Howard Beach', 'Little Neck', 'Brookville']


In [261]:
groupbyData = cluster_dataframe.groupby('Label')
groupbyData.get_group(2)['Neighborhood\hospital name']

170      Far Rockaway
171     Broad Channel
176          Edgemere
177           Arverne
178    Rockaway Beach
192        Somerville
302           Hammels
303         Bayswater
Name: Neighborhood\hospital name, dtype: object

In [316]:
# we need to find if there is a hospital in each cluster in order to determine whether a neighborhood needs a hospital near by
groupbyData = cluster_dataframe.groupby('Label')
count = 0
for number in range(0,max(nyc_hospitals_cluster.labels_) + 1):
    get_group_frame = groupbyData.get_group(number)['Neighborhood\hospital name']
    for hood_or_hospital in get_group_frame:
        for hospital in df_nyc_hospitals['Hospital Name']:
            if hood_or_hospital == hospital:
                count = count + 1
    if count == 0:
        neighborhoods_needs_nearby_hospital.extend(get_group_frame)
        #print('The following neighborhoods of cluster', number ,'need a hospital near by:\n', get_group_frame)
    else:
        count = 0
print('Project Results\n The following NYC neighborhoods need a nearby hospital according to the DBSCAN model\n\n', neighborhoods_needs_nearby_hospital)

Project Results
 The following NYC neighborhoods need a nearby hospital according to the DBSCAN model

 ['Douglaston', 'Rosedale', 'South Ozone Park', 'Travis', 'Rochdale', 'Sea Gate', 'Bath Beach', 'Rockaway Park', 'Breezy Point', 'Bay Ridge', 'Dyker Heights', 'Chelsea', 'Belle Harbor', 'Laurelton', 'St. Albans', 'Neponsit', 'Bayside', 'City Island', 'Springfield Gardens', 'Bloomfield', 'Fort Hamilton', 'Roxbury', 'Cambria Heights', 'Bay Terrace', 'Howard Beach', 'Little Neck', 'Brookville', 'Far Rockaway', 'Broad Channel', 'Edgemere', 'Arverne', 'Rockaway Beach', 'Somerville', 'Hammels', 'Bayswater', 'Eltingville', 'Annadale', 'Woodrow', 'Tottenville', 'Huguenot', 'Pleasant Plains', 'Butler Manor', 'Charleston', 'Rossville', 'Arden Heights', 'Greenridge', 'Sandy Ground', "Prince's Bay", 'Richmond Valley', 'Far Rockaway', 'Broad Channel', 'Edgemere', 'Arverne', 'Rockaway Beach', 'Somerville', 'Hammels', 'Bayswater', 'Eltingville', 'Annadale', 'Woodrow', 'Tottenville', 'Huguenot', 'Ple

In [30]:
# Define Foursquare Credentials and Version
CLIENT_ID = 'T0GMVPN0M5BBZJJX5EFNCWNY53A3ASN4GLHPL5NHAPYHXBEG' # your Foursquare ID
CLIENT_SECRET = '1ZNOAZE0ZWE3A1NWOBWUUZBKVTOBUEGVMC1VGMQ5XZNWYYZB' # your Foursquare Secret
VERSION = '20191101' # Foursquare API version
CATEGORY_ID_HOSPITALS = '4bf58dd8d48988d196941735'

In [None]:
# checking epsilon proportion to the real world. ie. - if the epsilon chosen above is less than 5000 meters or higher.
# for instance lets take Douglaston as the neighborhood
neighborhoods[neighborhoods['Neighborhood'] == 'Douglaston']
Douglaston_lat = neighborhoods.iloc[154,2]
Douglaston_long = neighborhoods.iloc[154,3]
#Using Foursquare 
radius = 5000
url = 'https://api.foursquare.com/v2/venues/search?ll=Douglaston_long,Douglaston_lat&categoryId=CATEGORY_ID_HOSPITALS&client_id=CLIENT_ID&client_secret=CLIENT_SECRET&limit=1&v=VERSION&radius=radius'
results = requests.get(url).json()
results
