This Jupyter Notebook is used for extracting data from multiple sources. The sources include trip data, occupancy data, and parking lot data. 

In [1]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import numpy as np
import os
import folium
from pathlib import Path
import boto3

# import from src
os.chdir('../src')
from trips_data_retrieval import get_agg_trips_by_market, get_agg_trips
import inrix_data_science_utils.maps.quadkey as qkey

### Extracting Trips

In [2]:
data_path = Path('../data')

In [3]:
# Change these as needed
# qk_list = ['02123003']  # seattle area
qk_list = ['021230032110033003', '021230032110033012', '021230032110033021', '021230032110033030']  # QFC North Bend
start_date = pd.to_datetime('2023-01-01 00:00:00')
end_date = pd.to_datetime('2023-06-30 00:01:00')
trip_type = 'dest'
filename_description = 'QFC_North_Bend'

# don't change these
start_year = str(start_date.year)
start_month = str(start_date.month).zfill(2)
start_day = str(start_date.day).zfill(2)
end_year = str(end_date.year)
end_month = str(end_date.month).zfill(2)
end_day = str(end_date.day).zfill(2)
out_filename = f'''trips_{start_year}-{start_month}-{start_day}_to_{
                end_year}-{end_month}-{end_day}{filename_description}.csv'''
out_file_path = data_path / out_filename
print('Will save the csv to ', out_file_path)
origin_qk = (trip_type == 'orig')

table_name = 'tripdata_na_restricted'
s3_tapp_data_dir = 'data-restricted'
s3_tapp_region = 'NA'

Will save the csv to  ..\data\trips_2023-01-01_to_2023-06-30QFC_North_Bend.csv


In [4]:
# if out_file_path not in the data folder, save it there
if not os.path.exists(out_file_path):
    df = get_agg_trips(
        agg_file_path = out_file_path,
        echo_query=True,
        table_name=table_name,
        start_date=start_date,
        end_date=end_date,
        qk_filter_list=qk_list,
        s3_tapp_data_dir=s3_tapp_data_dir,
        s3_tapp_region=s3_tapp_region,
        origin_qk=origin_qk,
    )
    print('Saved the csv to ', out_file_path)
else:
    df = pd.read_csv(out_file_path)

In [5]:
display(df)

Unnamed: 0,year,month,day,hour,minute,second,orig_qk17,dest_qk17,start_lat,start_lon,end_lat,end_lon,count,trip_id,is_moving
0,2023,5,9,12,4,36,2123003211021130,2123003211003300,47.483290,-121.776148,47.493282,-121.780980,1,bc8ca58e3494c3c51013f6d943fe5e6f,1
1,2023,3,15,0,22,40,2123003211211013,2123003211003302,47.454651,-121.740032,47.492775,-121.780713,1,2b29375756044ba7bdd227c8c9f089cf,1
2,2023,5,19,19,29,20,2123003211122002,2123003211003300,47.470064,-121.728046,47.493163,-121.780790,1,3062d6f9c16509612a98fb526759e429,1
3,2023,1,14,22,49,18,2123003022002231,2123003211003300,47.608591,-122.334015,47.493202,-121.781222,1,324c223b87ff6e3b3982790329312022,1
4,2023,4,3,13,41,23,2123003211030110,2123003211003301,47.486475,-121.753957,47.493133,-121.780636,1,4b7fbaa9e3f94ae90e8c3d54f377de04,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11815,2023,4,13,19,31,25,2123003211003220,2123003211003302,47.489462,-121.791918,47.492970,-121.780951,1,1e71364d1997b5707899e41d54714297,1
11816,2023,3,20,21,7,7,2123003211030110,2123003211003302,47.486587,-121.753874,47.492730,-121.781418,1,ed14404a9cc7b9c5ab3e24d06462ab40,1
11817,2023,5,13,21,36,12,2123003211003210,2123003211003302,47.494398,-121.787044,47.493018,-121.780943,1,f78879a0549b5c433180a0c57783a3fb,1
11818,2023,5,13,17,33,12,2123003211003231,2123003211003303,47.490072,-121.784258,47.492469,-121.780279,1,9f960eb88c80b76a4f26486db8c12977,1


In [6]:
df['date'] = pd.to_datetime(df[['year', 'month', 'day', 'hour', 'minute', 'second']])
display(df)

Unnamed: 0,year,month,day,hour,minute,second,orig_qk17,dest_qk17,start_lat,start_lon,end_lat,end_lon,count,trip_id,is_moving,date
0,2023,5,9,12,4,36,2123003211021130,2123003211003300,47.483290,-121.776148,47.493282,-121.780980,1,bc8ca58e3494c3c51013f6d943fe5e6f,1,2023-05-09 12:04:36
1,2023,3,15,0,22,40,2123003211211013,2123003211003302,47.454651,-121.740032,47.492775,-121.780713,1,2b29375756044ba7bdd227c8c9f089cf,1,2023-03-15 00:22:40
2,2023,5,19,19,29,20,2123003211122002,2123003211003300,47.470064,-121.728046,47.493163,-121.780790,1,3062d6f9c16509612a98fb526759e429,1,2023-05-19 19:29:20
3,2023,1,14,22,49,18,2123003022002231,2123003211003300,47.608591,-122.334015,47.493202,-121.781222,1,324c223b87ff6e3b3982790329312022,1,2023-01-14 22:49:18
4,2023,4,3,13,41,23,2123003211030110,2123003211003301,47.486475,-121.753957,47.493133,-121.780636,1,4b7fbaa9e3f94ae90e8c3d54f377de04,1,2023-04-03 13:41:23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11815,2023,4,13,19,31,25,2123003211003220,2123003211003302,47.489462,-121.791918,47.492970,-121.780951,1,1e71364d1997b5707899e41d54714297,1,2023-04-13 19:31:25
11816,2023,3,20,21,7,7,2123003211030110,2123003211003302,47.486587,-121.753874,47.492730,-121.781418,1,ed14404a9cc7b9c5ab3e24d06462ab40,1,2023-03-20 21:07:07
11817,2023,5,13,21,36,12,2123003211003210,2123003211003302,47.494398,-121.787044,47.493018,-121.780943,1,f78879a0549b5c433180a0c57783a3fb,1,2023-05-13 21:36:12
11818,2023,5,13,17,33,12,2123003211003231,2123003211003303,47.490072,-121.784258,47.492469,-121.780279,1,9f960eb88c80b76a4f26486db8c12977,1,2023-05-13 17:33:12


In [7]:
print(len(df['date'].unique()), 'unique timestamps')

11810 unique timestamps


In [8]:
m = folium.Map(zoom_start=10)

N = 11
for i in range(0, df.shape[0], N):
    folium.CircleMarker(
        location=[
            df['end_lat'].values[i],
            df['end_lon'].values[i]
        ],
        color='red',
        radius=1
    ).add_to(m)
m.fit_bounds(m.get_bounds())

m

In [9]:
map = folium.Map(zoom_start=10)
qk_list = ['021230032110033003', '021230032110033012', '021230032110033021', '021230032110033030']
for q_string in qk_list:
    q = qkey.QuadKey(q_string)
    folium.GeoJson(
        q.get_bounds_as_geojson(),
        style_function=lambda x: {'color': 'blue', 'weight': 1},
    ).add_to(map)
map.fit_bounds(map.get_bounds())
map

### Extract the 6 Flags Polygon Shapes

In [10]:
from polygon_retrieval import get_polygons

In [11]:
# Six Flags
brand_id = 'SG_BRAND_0dd52fbf1cd77fc38e06650435ada07d'

In [12]:
polygon_df = get_polygons(brand_id, format='df', echo_query=True)


        select poi.*, prk.polygon_wkt as parking_wkt
        from sg_poi.poi_staging_pg_partitions_202404 poi
        join sg_poi.parking_staging_pg_partitions prk on poi.placekey = prk.related_poi[1]
        where cardinality(related_poi) > 0 and cardinality(brands) > 0
        and brands[1].safegraph_brand_id = 'SG_BRAND_0dd52fbf1cd77fc38e06650435ada07d'
    
Query executed successfully.


In [15]:
# display all the columns of the polygon_df
pd.set_option('display.max_columns', None)
display(polygon_df.head())

Unnamed: 0,placekey,parent_placekey,location_name,brands,top_category,sub_category,naics_code,latitude,longitude,street_address,city,region,postal_code,open_hours,category_tags,opened_on,closed_on,tracking_closed_since,geometry_type,polygon_wkt,polygon_class,enclosed,phone_number,is_synthetic,includes_parking_lot,iso_country_code,wkt_area_sq_meters,related_parking,tz,year,month,pg_id,parking_wkt
0,zzw-222@5qs-zjz-52k,,Six Flags,"[{safegraph_brand_name=Six Flags, safegraph_br...",Amusement Parks and Arcades,Amusement and Theme Parks,713110,35.463453,-97.589371,3908 W Reno Ave,Oklahoma City,OK,73107,,"[Playground, Tubing, Water Park]",NaT,NaT,2019-07-01,POLYGON,"POLYGON ((-97.5869388850767 35.46049300255516,...",OWNED_POLYGON,False,14059439687,False,True,US,93191,"[zzz-222@5qs-zjy-3qz, zzz-224@5qs-zjy-3kf, zzz...",America/Chicago,2024,4,21,POLYGON ((-97.58799110886325 35.46405161940424...
1,zzw-222@5qs-zjz-52k,,Six Flags,"[{safegraph_brand_name=Six Flags, safegraph_br...",Amusement Parks and Arcades,Amusement and Theme Parks,713110,35.463453,-97.589371,3908 W Reno Ave,Oklahoma City,OK,73107,,"[Playground, Tubing, Water Park]",NaT,NaT,2019-07-01,POLYGON,"POLYGON ((-97.5869388850767 35.46049300255516,...",OWNED_POLYGON,False,14059439687,False,True,US,93191,"[zzz-222@5qs-zjy-3qz, zzz-224@5qs-zjy-3kf, zzz...",America/Chicago,2024,4,21,POLYGON ((-97.58968135229813 35.46350545719704...
2,zzw-222@5qs-zjz-52k,,Six Flags,"[{safegraph_brand_name=Six Flags, safegraph_br...",Amusement Parks and Arcades,Amusement and Theme Parks,713110,35.463453,-97.589371,3908 W Reno Ave,Oklahoma City,OK,73107,,"[Playground, Tubing, Water Park]",NaT,NaT,2019-07-01,POLYGON,"POLYGON ((-97.5869388850767 35.46049300255516,...",OWNED_POLYGON,False,14059439687,False,True,US,93191,"[zzz-222@5qs-zjy-3qz, zzz-224@5qs-zjy-3kf, zzz...",America/Chicago,2024,4,21,POLYGON ((-97.58799110886325 35.46405161940424...
3,zzw-222@5qs-zjz-52k,,Six Flags,"[{safegraph_brand_name=Six Flags, safegraph_br...",Amusement Parks and Arcades,Amusement and Theme Parks,713110,35.463453,-97.589371,3908 W Reno Ave,Oklahoma City,OK,73107,,"[Playground, Tubing, Water Park]",NaT,NaT,2019-07-01,POLYGON,"POLYGON ((-97.5869388850767 35.46049300255516,...",OWNED_POLYGON,False,14059439687,False,True,US,93191,"[zzz-222@5qs-zjy-3qz, zzz-224@5qs-zjy-3kf, zzz...",America/Chicago,2024,4,21,POLYGON ((-97.58968135229813 35.46350545719704...
4,222-222@5sb-94m-3nq,,Six Flags,"[{safegraph_brand_name=Six Flags, safegraph_br...",Amusement Parks and Arcades,Amusement and Theme Parks,713110,42.245289,-88.964116,7820 Cherryvale N Blvd,Cherry Valley,IL,61016,,"[Playground, Tubing, Water Park]",NaT,NaT,2019-07-01,POLYGON,"POLYGON ((-88.9618243393233 42.24699729743403,...",OWNED_POLYGON,False,18472494636,False,True,US,91897,"[zzz-222@5sb-94k-zpv, zzz-222@5sb-94m-2tv, zzz...",America/Chicago,2024,4,18,"POLYGON ((-88.9653561 42.248064, -88.9645666 4..."


In [19]:
for col_name in polygon_df.columns:
    print(col_name, ':', polygon_df[col_name][0])

placekey : zzw-222@5qs-zjz-52k
parent_placekey : nan
location_name : Six Flags
brands : [{safegraph_brand_name=Six Flags, safegraph_brand_id=SG_BRAND_0dd52fbf1cd77fc38e06650435ada07d}]
top_category : Amusement Parks and Arcades
sub_category : Amusement and Theme Parks
naics_code : 713110
latitude : 35.463453
longitude : -97.589371
street_address : 3908 W Reno Ave
city : Oklahoma City
region : OK
postal_code : 73107
open_hours : nan
category_tags : [Playground, Tubing, Water Park]
opened_on : NaT
closed_on : NaT
tracking_closed_since : 2019-07-01 00:00:00
geometry_type : POLYGON
polygon_wkt : POLYGON ((-97.5869388850767 35.46049300255516, -97.59002747531933 35.460460427199685, -97.59003656475501 35.464079172386775, -97.5879855335948 35.46403956548003, -97.58788373191521 35.46221244435525, -97.58692025173359 35.46220356035862, -97.5869388850767 35.46049300255516))
polygon_class : OWNED_POLYGON
enclosed : False
phone_number : +14059439687
is_synthetic : False
includes_parking_lot : True
i