In [62]:
from math import sqrt
import pandas as pd
import json
import requests
import random
import folium
from shapely.geometry import Point
from shapely.geometry import shape
import numpy as np

In [2]:
# Foursquare credentials
with open('credentials.json') as f:
    creds = json.load(f)

    CLIENT_ID = creds[0]['CLIENT_ID']
    CLIENT_SECRET = creds[0]['CLIENT_SECRET']

VERSION =  "20200101"
LIMIT = 50

In [3]:
geo_df = pd.read_csv('us-county-boundaries.csv', sep=';')
geo_df.head()

Unnamed: 0,Geo Point,Geo Shape,STATEFP,COUNTYFP,COUNTYNS,GEOID,NAME,NAMELSAD,STUSAB,LSAD,...,CSAFP,CBSAFP,METDIVFP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,STATE_NAME,COUNTYFP NOZERO
0,"39.7847564917,-100.459899544","{""type"": ""Polygon"", ""coordinates"": [[[-100.740...",20,39,484989,20039,Decatur,Decatur County,KS,6,...,,,,A,2314199329,1631670,39.783542,-100.459708,Kansas,39
1,"38.4561871377,-96.1526397502","{""type"": ""Polygon"", ""coordinates"": [[[-96.3543...",20,111,485020,20111,Lyon,Lyon County,KS,6,...,,21380.0,,A,2194978406,20541643,38.455403,-96.161641,Kansas,111
2,"43.560975208,-73.8460059552","{""type"": ""Polygon"", ""coordinates"": [[[-74.1825...",36,113,974154,36113,Warren,Warren County,NY,6,...,104.0,24020.0,,A,2246082355,166754989,43.555105,-73.838139,New York,113
3,"37.2372753377,-97.4765502094","{""type"": ""Polygon"", ""coordinates"": [[[-97.8045...",20,191,481812,20191,Sumner,Sumner County,KS,6,...,556.0,48620.0,,A,3060472194,8546399,37.236662,-97.493352,Kansas,191
4,"42.781081385,-76.8237785954","{""type"": ""Polygon"", ""coordinates"": [[[-76.9635...",36,99,974147,36099,Seneca,Seneca County,NY,6,...,464.0,42900.0,,A,838403628,172828921,42.782294,-76.827088,New York,99


In [4]:
geo_df = geo_df[['STATE_NAME', 'STUSAB', 'NAMELSAD', 'ALAND', 'AWATER', 'Geo Point', 'Geo Shape']]
geo_df['area'] = geo_df['ALAND'] + geo_df['AWATER']
geo_df.drop(['ALAND', 'AWATER'], axis=1, inplace=True)
geo_df['county_state'] = geo_df['NAMELSAD'] + ', ' + geo_df['STUSAB']
geo_df.columns = ['state_name', 'state', 'county', 'Geo Point', 'Geo Shape', 'area', 'county_state']
geo_df.head()

Unnamed: 0,state_name,state,county,Geo Point,Geo Shape,area,county_state
0,Kansas,KS,Decatur County,"39.7847564917,-100.459899544","{""type"": ""Polygon"", ""coordinates"": [[[-100.740...",2315830999,"Decatur County, KS"
1,Kansas,KS,Lyon County,"38.4561871377,-96.1526397502","{""type"": ""Polygon"", ""coordinates"": [[[-96.3543...",2215520049,"Lyon County, KS"
2,New York,NY,Warren County,"43.560975208,-73.8460059552","{""type"": ""Polygon"", ""coordinates"": [[[-74.1825...",2412837344,"Warren County, NY"
3,Kansas,KS,Sumner County,"37.2372753377,-97.4765502094","{""type"": ""Polygon"", ""coordinates"": [[[-97.8045...",3069018593,"Sumner County, KS"
4,New York,NY,Seneca County,"42.781081385,-76.8237785954","{""type"": ""Polygon"", ""coordinates"": [[[-76.9635...",1011232549,"Seneca County, NY"


In [40]:
# functions

def generate_random(number, polygon):
    list_of_points = []
    minx, miny, maxx, maxy = polygon.bounds
    counter = 0
    while counter < number:
        pnt = Point(random.uniform(minx, maxx), random.uniform(miny, maxy))
        if polygon.contains(pnt):
            coords = pnt.coords.xy
            lat = coords[1][0]
            lng = coords[0][0]
            list_of_points.append([lat, lng])
            counter += 1
    return list_of_points

def get_points(df):
    points_list = []
    for index, row in df.iterrows():
        county = row['county_state']
        poly = shape(json.loads(row['Geo Shape']))

        number = int(sqrt(row['area'])/50000)
        if number < 2:
            number = 2

        list_of_points = generate_random(number, poly)
        points_list.append([county, list_of_points])

    return points_list

def points_to_df(points_list):
    df = pd.DataFrame(columns = ['county', 'latitude', 'longitude'])
    for x in range(len(points_list)):
        coords =  points_list[x][1]
        for i in range(len(coords)):
            county = points_list[x][0]
            lat = (coords[i][0])
            lng = (coords[i][1])

            df = df.append({'county': county, 'latitude': lat, 'longitude': lng}, ignore_index=True)
    return df

def get_hospitals(counties, lats, lngs):
    categoryID1 = '4bf58dd8d48988d196941735'  # hospitals
    radius = 100000
    hospital_list = []
    id_list = []
    for i in range(len(lats)):
        county = counties[i]
        lat = lats[i]
        lng = lngs[i]

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            lat,
            lng,
            radius,
            LIMIT,
            categoryID1
            )
        try:
            results = requests.get(url).json()["response"]['venues']
            # print(results)

            # return only relevant information for each nearby venue
            for x in range(len(results)):
                if check_hospitals(county, results[x]['location']['lat'],
                    results[x]['location']['lng']):
                    if results[x]['id'] not in id_list:
                        id_list.append(results[x]['id'])
                        print(results[x]['name'])  # so I don't panic!
                        hospital_list.append([(
                        county,
                        results[x]['name'],
                        results[x]['location']['lat'],
                        results[x]['location']['lng'],
                        )])
        except KeyError:
            pass
    return hospital_list

def check_hospitals(county, lat, lon):
        point = Point(lon, lat)
        poly = geo_df[geo_df['county_state'] == county]['Geo Shape'].item()
        poly = shape(json.loads(poly))

        if poly.contains(point):
            return True
        else:
            return False


def hospital_list_to_df(hospital_list):
    df2 = pd.DataFrame(columns=['county', 'hospital', 'latitude', 'longitude'])

    for x in range(len(hospital_list)):
        county = hospital_list[x][0][0]
        hospital = hospital_list[x][0][1]
        latitude = hospital_list[x][0][2]
        longitude = hospital_list[x][0][3]

        # if check_hospitals(county, latitude, longitude):
        df2 = df2.append({'county': county, 'hospital': hospital, 'latitude': latitude, 'longitude': longitude}, ignore_index=True)


    df2.drop_duplicates(subset=['latitude', 'longitude'], keep='last', inplace=True)

    return df2


In [24]:
ny_df = geo_df[geo_df.state == 'NY']
ny_df.head()

Unnamed: 0,state_name,state,county,Geo Point,Geo Shape,area,county_state
2,New York,NY,Warren County,"43.560975208,-73.8460059552","{""type"": ""Polygon"", ""coordinates"": [[[-74.1825...",2412837344,"Warren County, NY"
4,New York,NY,Seneca County,"42.781081385,-76.8237785954","{""type"": ""Polygon"", ""coordinates"": [[[-76.9635...",1011232549,"Seneca County, NY"
5,New York,NY,Greene County,"42.2765197661,-74.1227065013","{""type"": ""Polygon"", ""coordinates"": [[[-74.4796...",1704341556,"Greene County, NY"
8,New York,NY,Wyoming County,"42.7023842075,-78.2244395122","{""type"": ""Polygon"", ""coordinates"": [[[-78.4638...",1544377612,"Wyoming County, NY"
10,New York,NY,Orange County,"41.402131202,-74.3055376374","{""type"": ""Polygon"", ""coordinates"": [[[-74.5297...",2171992499,"Orange County, NY"


In [25]:
ny_df.shape

(62, 7)

In [26]:
ny_points = get_points(ny_df)
ny_points[0]

['Warren County, NY',
 [[43.72349663740705, -73.5742626674317],
  [43.64048285283122, -73.8204972347205]]]

In [27]:
ny_points_df = points_to_df(ny_points)
ny_points_df.head()

Unnamed: 0,county,latitude,longitude
0,"Warren County, NY",43.723497,-73.574263
1,"Warren County, NY",43.640483,-73.820497
2,"Seneca County, NY",42.757725,-76.818349
3,"Seneca County, NY",43.017857,-76.808031
4,"Greene County, NY",42.302238,-73.819888


In [28]:
ny_points_df.shape

(124, 3)

In [29]:
ny_points_df.groupby('county').count()

Unnamed: 0_level_0,latitude,longitude
county,Unnamed: 1_level_1,Unnamed: 2_level_1
"Albany County, NY",2,2
"Allegany County, NY",2,2
"Bronx County, NY",2,2
"Broome County, NY",2,2
"Cattaraugus County, NY",2,2
...,...,...
"Washington County, NY",2,2
"Wayne County, NY",2,2
"Westchester County, NY",2,2
"Wyoming County, NY",2,2


In [41]:
ny_hosp_list = get_hospitals(ny_points_df.county, ny_points_df.latitude, ny_points_df.longitude)
ny_hosp_list[0][0]

Glens Falls Hospital
Glens Falls Hospital Snuggery
Wyoming County Community Hospital
Elmhurst Hospital Center
NewYork-Presbyterian Queens
St. Mary Hospital for Children
Northwell - Long Island Jewish Medical Center
Niagara Falls Memorial Medical Center
NFMMC S3 Cardiac/Stroke Unit
O'Connor Hospital
Moses Ludington Hospital
Elizabethtown Community Hospital
Inter-Lakes Health
Guthrie Clinic
United Memorial Medical Center
buffalo general
Jones Memorial Medical Imaging
Jones Memorial Hospital Maternity
Cuba Memorial Hospital
Little Falls Hospital
Catskill Regional Medical Center
Lewis County General Hospital
UHS Wilson Medical Center
Our Lady of Lourdes Memorial Hospital
Lourdes Breast Care Center
UHS Binghamton General Hospital
Lenga Center For The Mentally Handicapped And Overweight
South Tower 5
Lourdes Ambulatory Surgery
Strong Memorial Hospital | URMC
Rochester General Hospital
James P. Wilmot Cancer Center
St. Mary's Unity Hospital
Golisano Childrens Hospital At Strong
Highland Hospi

('Warren County, NY',
 'Glens Falls Hospital',
 43.30654759929057,
 -73.64700120738856)

In [47]:
ny_hosp_df = hospital_list_to_df(ny_hosp_list)
ny_hosp_df.head(), ny_hosp_df.shape

(               county                           hospital   latitude  longitude
 0   Warren County, NY               Glens Falls Hospital  43.306548 -73.647001
 1   Warren County, NY      Glens Falls Hospital Snuggery  43.305721 -73.647749
 2  Wyoming County, NY  Wyoming County Community Hospital  42.754591 -78.131291
 3   Queens County, NY           Elmhurst Hospital Center  40.745244 -73.885536
 4   Queens County, NY        NewYork-Presbyterian Queens  40.747248 -73.825336,
 (176, 4))

In [48]:
# remove 'parking', 'Vetinarian', 'Snuggery', 'waiting',
ny_hosp_df.drop(ny_hosp_df.loc[ny_hosp_df['hospital'].str.contains('parking')].index, inplace=True)
ny_hosp_df.drop(ny_hosp_df.loc[ny_hosp_df['hospital'].str.contains('Vetinarian')].index, inplace=True)
ny_hosp_df.drop(ny_hosp_df.loc[ny_hosp_df['hospital'].str.contains('Snuggery')].index, inplace=True)
ny_hosp_df.drop(ny_hosp_df.loc[ny_hosp_df['hospital'].str.contains('Waiting')].index, inplace=True)
ny_hosp_df.shape

(170, 4)

ok, it's not the 215 i should have, but hey, its a lot closer than 30!

In [50]:
ny_hosp_count = ny_hosp_df.groupby('county').count()
ny_hosp_count.head()

Unnamed: 0_level_0,hospital,latitude,longitude
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Albany County, NY",16,16,16
"Allegany County, NY",4,4,4
"Bronx County, NY",1,1,1
"Broome County, NY",7,7,7
"Cattaraugus County, NY",1,1,1


kansas


In [51]:
ks_df = geo_df[geo_df.state == 'KS']
ks_points = get_points(ks_df)
ks_points[0]

['Decatur County, KS',
 [[39.88832631088242, -100.46679859703595],
  [39.83495223567467, -100.6253582762825]]]

In [52]:
ks_points_df = points_to_df(ks_points)
ks_points_df.head()


Unnamed: 0,county,latitude,longitude
0,"Decatur County, KS",39.888326,-100.466799
1,"Decatur County, KS",39.834952,-100.625358
2,"Lyon County, KS",38.735745,-96.181838
3,"Lyon County, KS",38.466158,-96.088897
4,"Sumner County, KS",37.458485,-97.264679


In [53]:
ks_hosp_list = get_hospitals(ks_points_df.county, ks_points_df.latitude, ks_points_df.longitude)
ks_hosp_list[0][0]


Cedar Living Center
Sumner County Hospital
Cheyenne County Hospital
Logan County Hospital
Sheridan County Health Complex
St. Catherine Hospital
Leavenworth Va Hospital
Stanton County Hospital
Rice County District Hospital
Rice County Hospital
Lindsborg Hospital
Lindsborg Community Hospital
Mercy Hospital
Morris County Hospital
Hodgeman Co Hospital
Hays Medical Center
Michael E. DeBakey Heart Institute
KVC Wheatland Psychiatric Hospital
Miller Medical Pavilion
Rush County Memorial Hospital
Ransom Memorial Hospital
Atwood Hospital
Coffeyville Hospital
Hutchinson Regional Medical Center
Cotton O'Neil Carbondale
Lawrence Memorial Hospital
LMH IT Department
MedExpress Urgent Care
Horton Community Hospital
Bob Wilson Memorial Hospital
Graham County Hospital
Lincoln County Hospital
BELLEVILLE HEALTH CARE
Morton County Hospital
Greeley County Hospital
Comanche County Hospital
William Newton Hospital
Allen County Regional Hospital
Allen County Hospital
Salina Regional Health Center
COMCARE
Safe

('Decatur County, KS', 'Cedar Living Center', 39.822856, -100.532265)

In [84]:
ks_hosp_df = hospital_list_to_df(ks_hosp_list)
ks_hosp_df.head(), ks_hosp_df.shape


(                county                        hospital   latitude   longitude
 0   Decatur County, KS             Cedar Living Center  39.822856 -100.532265
 1    Sumner County, KS          Sumner County Hospital  37.026218  -97.609818
 2  Cheyenne County, KS        Cheyenne County Hospital  39.771112 -101.803468
 3     Logan County, KS           Logan County Hospital  39.127211 -100.862950
 4  Sheridan County, KS  Sheridan County Health Complex  39.354177 -100.432527,
 (160, 4))

In [85]:
# remove Waiting, Living

ks_hosp_df.drop(ks_hosp_df.loc[ks_hosp_df['hospital'].str.contains('Waiting')].index, inplace=True)
ks_hosp_df.drop(ks_hosp_df.loc[ks_hosp_df['hospital'].str.contains('Living')].index, inplace=True)
ks_hosp_df.shape

(156, 4)

In [86]:
ks_hosp_count = ks_hosp_df.groupby('county').count()
ks_hosp_count.head()

Unnamed: 0_level_0,hospital,latitude,longitude
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Allen County, KS",2,2,2
"Anderson County, KS",1,1,1
"Barton County, KS",2,2,2
"Brown County, KS",1,1,1
"Butler County, KS",1,1,1


In [87]:
county_df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')
county_df.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [88]:
county_df = county_df[['county', 'state', 'cases', 'deaths']]

In [89]:
county_df.columns = ['county_name', 'state_name', 'cases', 'deaths']
county_df.head()

Unnamed: 0,county_name,state_name,cases,deaths
0,Snohomish,Washington,1,0.0
1,Snohomish,Washington,1,0.0
2,Snohomish,Washington,1,0.0
3,Cook,Illinois,1,0.0
4,Snohomish,Washington,1,0.0


In [90]:
county_df = county_df[(county_df['state_name'] == 'New York') | (county_df['state_name'] == 'Kansas')]
county_df.head()

Unnamed: 0,county_name,state_name,cases,deaths
416,New York City,New York,1,0.0
448,New York City,New York,1,0.0
482,New York City,New York,2,0.0
518,New York City,New York,2,0.0
519,Westchester,New York,9,0.0


In [91]:
conditions = [(county_df['state_name'] == 'New York'),
              (county_df['state_name'] == 'Kansas')]
values = ['NY', 'KS']

county_df['state'] = np.select(conditions, values)
county_df['county'] = county_df['county_name'] + ', ' + county_df['state']
county_df.head()


Unnamed: 0,county_name,state_name,cases,deaths,state,county
416,New York City,New York,1,0.0,NY,"New York City, NY"
448,New York City,New York,1,0.0,NY,"New York City, NY"
482,New York City,New York,2,0.0,NY,"New York City, NY"
518,New York City,New York,2,0.0,NY,"New York City, NY"
519,Westchester,New York,9,0.0,NY,"Westchester, NY"


In [92]:
county_df['case_fatality_rate'] = round((county_df.deaths/county_df.cases)*1000, 2)

In [93]:
county_pop = pd.read_csv('US_county_pop.csv')
county_pop.head()

Unnamed: 0,state,county,state_name,geo_id,population,pop_density
0,AL,Autauga,Alabama,1001,55601,93.534505
1,AL,Baldwin,Alabama,1003,218022,137.138546
2,AL,Barbour,Alabama,1005,24881,28.11406
3,AL,Bibb,Alabama,1007,22400,35.986169
4,AL,Blount,Alabama,1009,57840,89.697955


In [94]:
county_pop['county'] = county_pop['county'] + ', ' + county_pop['state']
county_pop = county_pop[(county_pop['state'] == 'NY') | (county_pop['state'] == 'KS')]
county_pop = county_pop[['county', 'population']]
county_pop.head()

Unnamed: 0,county,population
888,"Allen, KS",12444
889,"Anderson, KS",7878
890,"Atchison, KS",16193
891,"Barber, KS",4472
892,"Barton, KS",26111


In [95]:
counties = pd.merge(county_df, county_pop, on='county')
counties.head()

Unnamed: 0,county_name,state_name,cases,deaths,state,county,case_fatality_rate,population
0,Westchester,New York,9,0.0,NY,"Westchester, NY",0.0,967612
1,Westchester,New York,17,0.0,NY,"Westchester, NY",0.0,967612
2,Westchester,New York,33,0.0,NY,"Westchester, NY",0.0,967612
3,Westchester,New York,69,0.0,NY,"Westchester, NY",0.0,967612
4,Westchester,New York,81,0.0,NY,"Westchester, NY",0.0,967612


In [96]:
ny_hosp_count.head()

Unnamed: 0_level_0,hospital,latitude,longitude
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Albany County, NY",16,16,16
"Allegany County, NY",4,4,4
"Bronx County, NY",1,1,1
"Broome County, NY",7,7,7
"Cattaraugus County, NY",1,1,1


In [97]:
ks_hosp_count.head()

Unnamed: 0_level_0,hospital,latitude,longitude
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Allen County, KS",2,2,2
"Anderson County, KS",1,1,1
"Barton County, KS",2,2,2
"Brown County, KS",1,1,1
"Butler County, KS",1,1,1


In [99]:
hosps = pd.concat([ny_hosp_count, ks_hosp_count])
hosps.head

<bound method NDFrame.head of                         hospital  latitude  longitude
county                                               
Albany County, NY             16        16         16
Allegany County, NY            4         4          4
Bronx County, NY               1         1          1
Broome County, NY              7         7          7
Cattaraugus County, NY         1         1          1
...                          ...       ...        ...
Trego County, KS               1         1          1
Washington County, KS          1         1          1
Wichita County, KS             1         1          1
Wilson County, KS              1         1          1
Wyandotte County, KS           5         5          5

[118 rows x 3 columns]>

In [102]:
# can't get them to merge

Unnamed: 0,county_name,state_name,cases,deaths,state,county,case_fatality_rate,population,hospital


'{"type": MultiPolygon"Polygon", "coordinates":0      [[[[-1.241956526315958, 54.722452909315834], [...\n1      [[[[-1.197048037027911, 54.58178034589915], [-...\n2      [[[[-1.200988774224933, 54.57758968504015], [-...\n3      [[[[-1.303874364736467, 54.56513718726291], [-...\n4      [[[-1.637990666809223, 54.617194817277564], [-...\n                             ...                        \n321    [[[-0.166455224660887, 51.38969284414564], [-0...\n322    [[[-0.02947478006328, 51.543029451145436], [-0...\n323    [[[-0.0082177840334, 51.64360739434634], [-0.0...\n324    [[[-0.126363701043512, 51.48446590586321], [-0...\n325    [[[-0.174141477648115, 51.538199616490886], [-...\nName: geometry.coordinates, Length: 326, dtype: object}'