# Turning Queries into Functions

## First, let's setup the database engine like we did last week

### Load DB credentials

## TODO: 
* add readonly credentials to canvas for download
* Strip out some of the queries so we can build them as a class

In [1]:
import json

# TODO: make sure to download credentials from https://canvas.upenn.edu/files/89654914/download?download_frd=1
# save them to the base directory for this repo
with open("pg-credentials.json") as creds:
    creds = json.load(creds)

PASSWORD = creds["PASSWORD"]
HOST = creds["HOST"]
USERNAME = creds["USERNAME"]
DATABASE = creds["DATABASE"]
PORT = creds["PORT"]

### Create DB engine

In [2]:
from sqlalchemy import create_engine

engine = create_engine(f"postgresql://{USERNAME}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

## Parameterizing Queries

With SQLAlchemy, you can parametrize queries, called 'bound parameters', by putting them with a special templating notation: `:variable_name`. Read more here: <https://docs.sqlalchemy.org/en/13/core/tutorial.html#using-textual-sql>. First we need to wrap a query template in a SQLAlchemy function called `text`.

In [3]:
from sqlalchemy.sql import text

engine.execute(text("SELECT :num"), num=2).fetchall()

[(2,)]

In [4]:
query = text("""
    SELECT i
    FROM generate_series(:minval, :maxval) as s(i)
""")
engine.execute(query, minval=1, maxval=10).fetchall()

[(1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,), (10,)]

## As a function

In [5]:
def hello_person(name):
    return engine.execute(text("SELECT 'HELLO ' || :name As hello_text"), name=name).fetchone()

In [6]:
hello_person("Andy")

('HELLO Andy',)

### Give me first five rows of a table (similar to what Carto does on data page)

In [7]:
def fetch_n_rows(n_rows=5):
    """Return first n_rows of a """
    query = text("""
        SELECT name, totaldocks, docksavailable
        FROM indego_station_status LIMIT :n_rows
    """)
    return engine.execute(query, n_rows=n_rows).fetchall()

In [8]:
fetch_n_rows()

[('11th & Poplar, John F. Street Community Center', 15, 11),
 ('Amtrak 30th Street Station', 21, 20),
 ('43rd & Chester, Clark Park', 19, 14),
 ('Front & Carpenter', 25, 22),
 ('Philadelphia Zoo', 12, 9)]

## How Many Covid Tests?

Data from Philadelphia Open Data Portal: <https://www.opendataphilly.org/dataset/covid-cases>

In [9]:
from sqlalchemy.sql import bindparam
from sqlalchemy import String, Integer

# specify data type will help the query be formatted correctly.

def get_covid_test_numbers(zip_code):
    query = text("""
    -- Let's do this together
    """)
    query.bindparams(bindparam('zip_code', type_=String))
    resp = engine.execute(query, zip_code=zip_code).fetchone()
    
    return resp['num_tests_negative'], resp['num_tests_positive']

In [10]:
get_covid_test_numbers('19102')

(2340, 85)

### Get the five closest zip codes' testing

In [11]:
def get_five_closest_case_numbers(zip_code):
    query = text("""
    -- Let's do this together
    """)
    resp = engine.execute(query, zip_code=zip_code).fetchall()
    
    return [(row['zip_code'], row['num_tests_negative'], row['num_tests_positive'], row['distance_apart']) for row in resp]

In [12]:
get_five_closest_case_numbers('19102')

[('19123', 6774, 455, 0.0),
 ('19109', 12, 0, 0.0),
 ('19107', 5639, 320, 0.0),
 ('19103', 9206, 340, 0.0),
 ('19130', 11440, 536, 0.0)]

### Get Closest Testing Numbers

In [13]:
def get_closest_case_numbers(zip_code, num_nearest=10):
    query = text("""
    -- Let's do this together
    """)
    resp = engine.execute(query, zip_code=zip_code, num_nearest=num_nearest).fetchall()
    
    return [(row['zip_code'], row['num_tests_negative'], row['num_tests_positive'], row['distance_apart']) for row in resp]

In [14]:
get_closest_case_numbers('19102', num_nearest=20)

[('19146', 15706, 792, 0.0),
 ('19103', 9206, 340, 0.0),
 ('19109', 12, 0, 0.0),
 ('19123', 6774, 455, 0.0),
 ('19107', 5639, 320, 0.0),
 ('19130', 11440, 536, 0.0),
 ('19147', 13622, 559, 0.0),
 ('19106', 4899, 137, 857.11855593),
 ('19104', 14274, 921, 963.05874068),
 ('19121', 10193, 994, 1336.14649559),
 ('19122', 7518, 657, 1336.00328719),
 ('19145', 14188, 1090, 1645.19440292),
 ('19148', 14416, 1233, 1648.08329682),
 ('19132', 8535, 866, 3003.31539157),
 ('19133', 5888, 808, 3006.36405143),
 ('19125', 8537, 427, 2422.53041855),
 ('19131', 13260, 1297, 2853.72954238),
 ('19143', 19795, 1608, 3106.3499909),
 ('19153', 2594, 302, 4027.10403053),
 ('19139', 13754, 1178, 3685.39186301)]

## Fetching data from BigQuery

In [15]:
from google.cloud import bigquery
import geopandas as gpd
from shapely import wkt

# NOTE: you need to setup a service account (or use another auth method)
bqclient = bigquery.Client.from_service_account_json("MUSA-509-3337814ad805.json")

In [16]:
from shapely import wkt

query = f"""
SELECT (select value from unnest(all_tags) WHERE key = 'amenity') as amenity_type,
       COUNT(*) as num_amenities
  FROM `bigquery-public-data.geo_openstreetmap.planet_features`
 WHERE 'amenity' IN (SELECT key FROM UNNEST(all_tags))
 AND ST_INTERSECTSBOX(ST_Centroid(geometry), -75.280298,39.867005,-74.955831,40.137959)
GROUP BY 1
ORDER BY 2 DESC
"""
response = bqclient.query(query)

# print the rows
for row in response:
    print(row['amenity_type'].ljust(17), row['num_amenities'])

parking           3733
place_of_worship  1401
school            1054
restaurant        1033
fast_food         600
bench             554
social_facility   408
bank              273
fuel              244
cafe              241
parking_entrance  202
bar               192
fire_station      185
waste_basket      182
pharmacy          181
car_sharing       167
shelter           142
library           122
bicycle_parking   113
post_office       113
post_box          109
pub               88
marketplace       87
toilets           76
fountain          69
clinic            67
community_centre  65
theatre           64
atm               63
kindergarten      63
police            62
childcare         53
university        48
hospital          46
recycling         42
grave_yard        37
ice_cream         34
doctors           33
car_wash          33
college           33
dentist           32
car_rental        31
bicycle_rental    26
vending_machine   24
cinema            23
drinking_water    22
waste_dis

* [Parameterize queries](https://cloud.google.com/bigquery/docs/parameterized-queries) to avoid SQL Injection

BigQuery uses `@variable_name` notation for templating/parametrizing names in queries.

It makes use of the `QueryJobConfig` object in Python: <https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.job.QueryJobConfig.html>

In [17]:
def get_nearest_cafes(lng, lat, distance, amenity_type="cafe"):
    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("poi_category", "STRING", amenity_type),
            bigquery.ScalarQueryParameter("lng", "FLOAT", lng),
            bigquery.ScalarQueryParameter("lat", "FLOAT", lat),
            bigquery.ScalarQueryParameter("distance", "FLOAT", distance)
        ]
    )
    query = f"""
        SELECT (select value from unnest(all_tags) WHERE key = 'name') as amenity_name, 
               (select value from unnest(all_tags) WHERE key = 'amenity') as amenity_type,
               (select value from unnest(all_tags) WHERE key = 'addr:street') as address,
               (select value from unnest(all_tags) WHERE key = 'phone') as phone_number,
               CAST(round(ST_Distance(ST_GeogPoint(@lng, @lat), ST_Centroid(geometry))) AS int64) as distance_away_meters,
               geometry
          FROM `bigquery-public-data.geo_openstreetmap.planet_features`
         WHERE ('amenity', @poi_category) IN (SELECT (key, value) FROM UNNEST(all_tags))
         and ST_DWithin(ST_GeogPoint(@lng, @lat), ST_Centroid(geometry), @distance)
         ORDER BY distance_away_meters ASC
    """
    response = bqclient.query(query, job_config=job_config)
    return response

In [22]:
meyerson_latlng = (39.9522139, -75.1927795)
response = get_nearest_cafes(meyerson_latlng[1], meyerson_latlng[0], 1000, 'cafe')

In [23]:
for row in response:
    description = f"{row['amenity_name']} is {row['distance_away_meters']} meters away"
    if row['address'] is not None:
        description = description + f" on {row['address']}"
    print(description + '\n')

Starbucks is 98 meters away on Walnut Street

United By Blue is 102 meters away on Walnut Street

Avril 50 is 144 meters away on Sansom Street

Starbucks is 262 meters away

Kiwi Frozen Yougurt is 327 meters away

Saxby's is 377 meters away on Chestnut Street

Joe Coffee is 421 meters away on Chestnut

Starbucks is 510 meters away

Starbucks is 551 meters away

Starbucks is 564 meters away

Saxbys - Drexel is 604 meters away on North 34th Street

World Cafe Live is 650 meters away on Walnut Street

Starbucks is 678 meters away

Saxbys is 879 meters away on Locust Street

Green Line Cafe is 922 meters away on S 41st St

Metropolitan Bakery Café is 932 meters away on Walnut Street



In [24]:
from cartoframes.viz import Layer

cafes = gpd.GeoDataFrame(response.to_dataframe(), geometry=[wkt.loads(row.geometry).centroid for row in response], crs="epsg:4326")

Layer(cafes)

### Add input validation

In [27]:
query = f"""
SELECT DISTINCT (select value from unnest(all_tags) WHERE key = 'amenity') as amenity_type
  FROM `bigquery-public-data.geo_openstreetmap.planet_features`
 WHERE 'amenity' IN (SELECT key FROM UNNEST(all_tags))
 AND ST_INTERSECTSBOX(ST_Centroid(geometry), -75.280298,39.867005,-74.955831,40.137959)
"""
response = bqclient.query(query)

In [28]:
poi_valid_set = set([row['amenity_type'] for row in response])
poi_valid_set

{'animal_shelter',
 'animal_training',
 'arts_centre',
 'atm',
 'atm;pharmacy',
 'bank',
 'bar',
 'bar;pub',
 'bbq',
 'bench',
 'bicycle_parking',
 'bicycle_rental',
 'bicycle_repair_station',
 'biergarten',
 'boat_rental',
 'bureau_de_change',
 'bus_station',
 'cafe',
 'car_rental',
 'car_sharing',
 'car_wash',
 'casino',
 'charging_station',
 'childcare',
 'cinema',
 'clinic',
 'clock',
 'college',
 'community_centre',
 'compressed_air',
 'conference_centre',
 'courthouse',
 'coworking_space',
 'dentist',
 'disused',
 'doctors',
 'dojo',
 'drinking_water',
 'driving_school',
 'events_venue',
 'fast_food',
 'ferry_terminal',
 'fire_station',
 'food_court',
 'fountain',
 'fraternity',
 'fuel',
 'gazebo',
 'grave_yard',
 'gym',
 'hobby_shop',
 'hospital',
 'ice cream',
 'ice_cream',
 'internet_cafe',
 'kindergarten',
 'language_school',
 'letter_box',
 'library',
 'loading_dock',
 'marketplace',
 'monastery',
 'money_transfer',
 'motorcycle_parking',
 'music_school',
 'music_venue',
 'n

In [30]:
def validate_poi_input(category):
    if category not in poi_valid_set:
        raise ValueError(f"`{category}` is not valid entry. Try one of {', '.join(poi_valid_set)}")

In [31]:
validate_poi_input('hi')

ValueError: `hi` is not valid entry. Try one of fountain, hobby_shop, compressed_air, dojo, police, cafe, driving_school, music_venue, vending_machine, ranger_station, music_school, parking_entrance, library, arts_centre, public_building, waste_disposal, kindergarten, food_court, bbq, bench, toilets, bar;pub, loading_dock, prison, nursing_home, fuel, bureau_de_change, public_bookcase, conference_centre, animal_training, swimming_pool, soaicl_facility, veterinary, bank, telephone, taxi, charging_station, fraternity, bar, post_office, bicycle_repair_station, grave_yard, childcare, biergarten, social_facility, marketplace, car_sharing, clinic, hospital, letter_box, ice cream, boat_rental, atm, fire_station, cinema, ferry_terminal, courthouse, trailer_park, college, ice_cream, casino, restaurant, nightclub, university, motorcycle_parking, atm;pharmacy, monastery, drinking_water, pub, studio, community_centre, animal_shelter, coworking_space, bicycle_rental, waste_container, language_school, school, recycling, doctors, townhall, trolley_bay, prep_school, post_box, stripclub, shelter, car_rental, clock, events_venue, place_of_worship, public_bath, money_transfer, internet_cafe, theatre, bicycle_parking, pharmacy, waste_basket, bus_station, gym, dentist, parking, disused, fast_food, parking_space, waste_transfer_station, gazebo, car_wash

In [32]:
def get_nearest_cafes(lng, lat, distance, amenity_type="cafe"):
    validate_poi_input(amenity_type)
    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("poi_category", "STRING", amenity_type),
            bigquery.ScalarQueryParameter("lng", "FLOAT", lng),
            bigquery.ScalarQueryParameter("lat", "FLOAT", lat),
            bigquery.ScalarQueryParameter("distance", "FLOAT", distance)
        ]
    )
    query = f"""
        SELECT (select value from unnest(all_tags) WHERE key = 'name') as amenity_name, 
               (select value from unnest(all_tags) WHERE key = 'amenity') as amenity_type,
               (select value from unnest(all_tags) WHERE key = 'addr:street') as address,
               (select value from unnest(all_tags) WHERE key = 'phone') as phone_number,
               CAST(round(ST_Distance(ST_GeogPoint(@lng, @lat), ST_Centroid(geometry))) AS int64) as distance_away_meters,
               geometry
          FROM `bigquery-public-data.geo_openstreetmap.planet_features`
         WHERE ('amenity', @poi_category) IN (SELECT (key, value) FROM UNNEST(all_tags))
         and ST_DWithin(ST_GeogPoint(@lng, @lat), ST_Centroid(geometry), @distance)
         ORDER BY distance_away_meters ASC
    """
    response = bqclient.query(query, job_config=job_config)
    return response

In [33]:
get_nearest_cafes(meyerson_latlng[1], meyerson_latlng[0], 1000, 'playground')

ValueError: `playground` is not valid entry. Try one of fountain, hobby_shop, compressed_air, dojo, police, cafe, driving_school, music_venue, vending_machine, ranger_station, music_school, parking_entrance, library, arts_centre, public_building, waste_disposal, kindergarten, food_court, bbq, bench, toilets, bar;pub, loading_dock, prison, nursing_home, fuel, bureau_de_change, public_bookcase, conference_centre, animal_training, swimming_pool, soaicl_facility, veterinary, bank, telephone, taxi, charging_station, fraternity, bar, post_office, bicycle_repair_station, grave_yard, childcare, biergarten, social_facility, marketplace, car_sharing, clinic, hospital, letter_box, ice cream, boat_rental, atm, fire_station, cinema, ferry_terminal, courthouse, trailer_park, college, ice_cream, casino, restaurant, nightclub, university, motorcycle_parking, atm;pharmacy, monastery, drinking_water, pub, studio, community_centre, animal_shelter, coworking_space, bicycle_rental, waste_container, language_school, school, recycling, doctors, townhall, trolley_bay, prep_school, post_box, stripclub, shelter, car_rental, clock, events_venue, place_of_worship, public_bath, money_transfer, internet_cafe, theatre, bicycle_parking, pharmacy, waste_basket, bus_station, gym, dentist, parking, disused, fast_food, parking_space, waste_transfer_station, gazebo, car_wash

## OpenStreetMap Editing

Are you interested in OSM for your project? There are many ways to get OSM data, including semi-yearly updates on BigQuery. There are daily extracts for regions of the world at [GeoFabrik](https://download.geofabrik.de/). The shapefiles can be big and hard to get down to the region of interest.

### Is OSM lacking in a region you want? Start adding your house, your parents house, etc.

<https://www.openstreetmap.org/#map=17/39.95484/-75.20505>