1. get_results() : (Googlemap API) taking in string location, int radius, one category, and output all results of that category within radius, distance from input

2. map_covid_vax(): Assign neighborhood(zipcode from location objects) to list of results, map covid and vaccination data accordingly; compute risk level/score

3. rank_results(): apply any filters to the results, including risk score(covid positive #, positive %, vax %), rating score, distance, … 

4. Master function: pass user input to all functions and return json locations(w/ info/scores/risk levels) in ranked order


In [None]:
# Packages preparation
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import googlemaps
from datetime import datetime
import json

last7 = pd.read_csv("/work/last7days-by-modzcta.csv")
vax_data = pd.read_csv("/work/final_data.csv")

### 1. get_results() : (Googlemap API) taking in string location, int radius, one category, and output all results of that category within radius, distance from input

In [None]:
def get_results(address, category, radius=100):
    """
    This function extracts a list of location results using Google Map API
    Input: string location, string category, (optional) int radius
    Output: all results of that category within radius, distance from location
    """
    gmaps = googlemaps.Client(key='AIzaSyC5iZcLzCj7VONadthvLMekcGCVWo-VmKw')

    # Geocoding an address
    geocode_result = gmaps.geocode(address)
    # {"sublocality_level_1": "Manhattan", "locality": "New York", "administrative_area_level_1": "NY"})
    origin = geocode_result[0]['geometry']['location']
    # zip_code = geocode_result[0]['address_components'][-1]['short_name']

    # Search nearby open places in a specified category within a radius
    results = gmaps.places_nearby(location=origin, radius=radius, type=category, open_now=True)['results']
    print("Number of results: ", len(places_result))

    # get a list of destination geocodes and compute distances to origin
    geocodes = [tuple(place['geometry']['location'].values()) for place in places_result]
    distances = gmaps.distance_matrix(origins=origin, destinations=geocodes)['rows'][0]['elements']

    
    res_list = []
    for i, place in enumerate(places_result):
        res = {}
        res['name'] = place['name']
        res['geolocation'] = tuple(place['geometry']['location'].values())
        res['rating'] = place['rating']
        res['address'] = place['vicinity']

        # get distance and duration from origin 
        input_distance = distances[i]
        res['distance'] = input_distance['distance']['value']
        res['duration'] = input_distance['duration']['text']

        # get zip code and reviews using place details api
        place_id = place['place_id']
        place_details = gmaps.place(place_id)['result']
        res['zip_code'] = place_details['address_components'][-1]['short_name']
        reviews = place_details['reviews']
        res['reviews'] = []
        for review in reviews:
            review_text = {}
            reviewer = review['author_name']
            review_text[reviewer] = review['text']
            res['reviews'].append(review_text)
                
        res_list.append(res)
    
    return pd.DataFrame(res_list)
            


### 2. map_covid_vax(): Assign neighborhood(zipcode from location objects) to list of results, map covid and vaccination data accordingly; compute risk level/score

In [None]:
def map_covid_vax(map_result):
    """
    This function extracts ZIP code from the list of location object that match the specifications listed by the user

    Parameters: {
        locations: list of all locations of the same category and radius - specified by the user
    }
    Requires: candidate results outside exicluded from final_data will have NaN instead
    Returns: dataframe of all results mapped to COVID cases and vaccination data 
    """
    # https://datascience.stackexchange.com/questions/39773/mapping-column-values-of-one-dataframe-to-another-dataframe-using-a-key-with-dif
    # Need to see the output of get_results() function and the return type 
    map_result = map_result[map_result.zip_code.astype(int) >= 10001]
    mapping_pos = dict(last7[['modzcta', 'people_positive']].values)
    mapping_vax = dict(vax_data[['modzcta', '%_full_vax']].values)
    mapping_percent_pos = dict(vax_data[['modzcta', '%_positive']].values)
    map_result['positive_cases'] = map_result.zip_code.astype(int).map(mapping_pos)
    map_result['percent_positive'] = map_result.zip_code.astype(int).map(mapping_percent_pos)
    map_result['full_vax'] = map_result.zip_code.astype(int).map(mapping_vax)
    
    return map_result

### 3. rank_results(): apply any filters to the results, including risk score(covid positive #, positive %, vax %), rating score, distance, …


In [None]:
def rank_results(data, min_rating=0.0):
    """ 
    This apply all filters to the results: 
    including risk score(covid positive #, positive %, vax %), rating score, …

    Parameters: {
        data: dataset containing descriptions of candidate locations
        min_rating: float, results with lower ratings will be dropped, results w/o ratings will be excluded
    }
    Requires: 
    Returns: dataframe in ranked order
    """
    # normalize columns
    n_full_vax = (data.full_vax-data.full_vax.mean())/data.full_vax.std()
    n_percent_positive = (data.percent_positive-data.percent_positive.mean())/data.percent_positive.std()
    n_rating = (data.rating-data.rating.mean())/data.rating.std()
    n_distance = (data.distance-data.distance.mean())/data.distance.std()

    # compute weighted score
    data['score'] = n_full_vax*15.0 - n_percent_positive*7.0 + n_rating*5.0 - n_distance*5.0

    # sort by score
    data = data.sort_values(by='score', ascending=False, na_position='last')
    data = data[data.rating >= min_rating]     # drop results below min_rating 

    return data
    

### 4. Master function: pass user input to all functions and return json locations(w/ info/scores/risk levels) in ranked order

In [None]:
def get_covid_data(address, category, radius, min_rating):
    """
    This function converts the ranked data in dataframe type to json

    Paramters: {
        ranked_data: dataframe in ranked order
    }
    Requires:
    Returns: json version of the ranked_data
    """
    # json_data = ranked_data.to_json(orient="columns")
    # Need to see the orientation of the dataframe
    # return json_data
    result = get_results(address, category, radius=100)
    mapped_result = map_covid_vax(result)
    ranked_result = rank_results(mapped_result, min_rating=0.0)

    json_data = ranked_result.to_json(orient="columns")

    return json.loads(json_data)

### Test

In [None]:
test_addr = "20 W 34th St, New York, NY 10001"
results = get_results(test_addr, 'restaurant')
results

Number of results:  16


Unnamed: 0,name,geolocation,rating,address,distance,duration,zip_code,reviews
0,Jack Demsey's,"(40.74845930000001, -73.98687129999999)",4.2,"36 West 33rd Street, New York",349,2 mins,10001,[{'John Vadseth': 'Great place around the corn...
1,Chipotle Mexican Grill,"(40.7482436, -73.9851073)",4.1,"350 5th Avenue Lbby 6, New York",135,1 min,10118,[{'Shourya Dharmale': 'It was my first time or...
2,Football Factory at Legends,"(40.7479328, -73.9857111)",4.4,"6 West 33rd Street, New York",236,1 min,3321,[{'Vince Meyer': 'Best place to watch soccer i...
3,Gammeeok 감미옥,"(40.74771519999999, -73.9860561)",3.8,"9 West 32nd Street 2nd floor, New York",965,6 mins,10001,[{'J': 'Soul food and sweet and good 깍둑이 I cam...
4,Five Senses (Koreatown),"(40.747715, -73.98605599999999)",4.3,"9 West 32nd Street, New York",965,6 mins,10001,[{'Quoc Pham (Q)': 'It's mad pricey but I unde...
5,Turntable Chicken Jazz,"(40.74818880000001, -73.9863847)",4.3,"20 West 33rd Street, New York",299,1 min,3305,[{'Michael Keefe': 'Delicious food - had the c...
6,Cajun Sea,"(40.7485067, -73.9862098)",4.5,"23 West 33rd Street, New York",304,1 min,10001,[{'christopher werner': 'Nice site for quality...
7,Tacombi,"(40.7483996, -73.9861986)",4.4,"23 West 33rd Street, New York",297,1 min,10118,[{'Legna Rivera': 'My husband and I spent the ...
8,J & H Eatery,"(40.7482182, -73.98643799999999)",2.0,"22 West 33rd Street, New York",304,1 min,3302,[{'Tim Burke': 'Some of the rudest staff in th...
9,Shabu Garden,"(40.74825269999999, -73.98647079999999)",4.5,"24 West 33rd Street, New York",309,1 min,10001,[{'Jin Lee': 'Best all you can eat Shabu/hotpo...


In [None]:
mapp = map_covid_vax(results)
results_filtered = rank_results(mapp, 4.0)
results_filtered

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Unnamed: 0,name,geolocation,rating,address,distance,duration,zip_code,reviews,positive_cases,percent_positive,full_vax,score
6,Cajun Sea,"(40.7485067, -73.9862098)",4.5,"23 West 33rd Street, New York",304,1 min,10001,[{'christopher werner': 'Nice site for quality...,53.0,0.038462,0.27858,-11.994518
9,Shabu Garden,"(40.74825269999999, -73.98647079999999)",4.5,"24 West 33rd Street, New York",309,1 min,10001,[{'Jin Lee': 'Best all you can eat Shabu/hotpo...,53.0,0.038462,0.27858,-12.066035
10,Dim Sum Palace,"(40.7485991, -73.98676139999999)",4.5,"33 West 33rd Street, New York",349,2 mins,10001,[{'Tomomi Ohta': 'This place provides unbeliev...,53.0,0.038462,0.27858,-12.638176
11,Seoulmenu,"(40.7477348, -73.9861939)",4.8,"11w West 32nd Street, New York",954,5 mins,10001,"[{'Felix': 'disappointing taste, but the price...",53.0,0.038462,0.27858,-16.364954
0,Jack Demsey's,"(40.74845930000001, -73.98687129999999)",4.2,"36 West 33rd Street, New York",349,2 mins,10001,[{'John Vadseth': 'Great place around the corn...,53.0,0.038462,0.27858,-17.565027
15,Pelicana Chicken Koreatown,"(40.74773509999999, -73.9862098)",4.3,"11 West 32nd Street #3819, New York",953,5 mins,10001,"[{'Samantha Ng': 'It's okay. Not my favorite, ...",53.0,0.038462,0.27858,-24.562068
4,Five Senses (Koreatown),"(40.747715, -73.98605599999999)",4.3,"9 West 32nd Street, New York",965,6 mins,10001,[{'Quoc Pham (Q)': 'It's mad pricey but I unde...,53.0,0.038462,0.27858,-24.733711
13,NewYork Roll'n Katsu,"(40.7477348, -73.9861939)",4.2,"11 West 32nd Street, New York",954,5 mins,10001,[{'Claire J': 'I am blown away by the quality ...,53.0,0.038462,0.27858,-26.218656
12,Noona Noodles,"(40.7477348, -73.9861939)",4.0,"11 West 32nd Street, New York",954,5 mins,10001,[{'admin admin': 'soo gooddddd. If they made a...,53.0,0.038462,0.27858,-29.503223
1,Chipotle Mexican Grill,"(40.7482436, -73.9851073)",4.1,"350 5th Avenue Lbby 6, New York",135,1 min,10118,[{'Shourya Dharmale': 'It was my first time or...,,,,


In [None]:
data_json = get_covid_data(test_addr, 'restaurant', 100, 2.0)
data_json

Number of results:  16
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/

{'name': {'6': 'Cajun Sea',
  '9': 'Shabu Garden',
  '10': 'Dim Sum Palace',
  '11': 'Seoulmenu',
  '0': "Jack Demsey's",
  '15': 'Pelicana Chicken Koreatown',
  '4': 'Five Senses (Koreatown)',
  '13': "NewYork Roll'n Katsu",
  '12': 'Noona Noodles',
  '3': 'Gammeeok 감미옥',
  '14': 'Jian Bing Man 煎饼侠',
  '1': 'Chipotle Mexican Grill',
  '7': 'Tacombi'},
 'geolocation': {'6': [40.7485067, -73.9862098],
  '9': [40.7482527, -73.9864708],
  '10': [40.7485991, -73.9867614],
  '11': [40.7477348, -73.9861939],
  '0': [40.7484593, -73.9868713],
  '15': [40.7477351, -73.9862098],
  '4': [40.747715, -73.986056],
  '13': [40.7477348, -73.9861939],
  '12': [40.7477348, -73.9861939],
  '3': [40.7477152, -73.9860561],
  '14': [40.7477351, -73.9862097],
  '1': [40.7482436, -73.9851073],
  '7': [40.7483996, -73.9861986]},
 'rating': {'6': 4.5,
  '9': 4.5,
  '10': 4.5,
  '11': 4.8,
  '0': 4.2,
  '15': 4.3,
  '4': 4.3,
  '13': 4.2,
  '12': 4.0,
  '3': 3.8,
  '14': 3.7,
  '1': 4.1,
  '7': 4.4},
 'address'

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=09a12e0c-06d7-4999-ad39-39e13a535978' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>