This Jupyter Notebook is used for extracting data from multiple sources. The sources include trip data, occupancy data, and parking lot data. 

In [1]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import numpy as np
import os
import folium
from pathlib import Path
import boto3

# import from src
os.chdir('../src')
from trips_data_retrieval import get_agg_trips_by_market, get_agg_trips

### Extracting Trips

In [2]:
data_path = Path('../data')

In [3]:
# Change these as needed
qk_list = ['02123003']
start_date = pd.to_datetime('2023-05-08 00:00:00')
end_date = pd.to_datetime('2023-05-11 00:01:00')
out_filename = 'tripdata.csv'
trip_type = 'dest'

# don't change these
out_file_path = data_path / out_filename
origin_qk = (trip_type == 'orig')

table_name = 'tripdata_na_restricted'
s3_tapp_data_dir = 'data-restricted'
s3_tapp_region = 'NA'


In [6]:
df = get_agg_trips(
    agg_file_path = out_file_path,
    echo_query=True,
    table_name=table_name,
    start_date=start_date,
    end_date=end_date,
    qk_filter_list=qk_list,
    s3_tapp_data_dir=s3_tapp_data_dir,
    s3_tapp_region=s3_tapp_region,
    origin_qk=origin_qk,
)


            WITH qk_counts AS(
            SELECT start_time, provider, start_lat, start_lon, end_lat, end_lon, 
                    BING_TILE_QUADKEY(BING_TILE_AT(end_lat, end_lon, 17)) AS dest_qk17,
                    BING_TILE_QUADKEY(BING_TILE_AT(start_lat, start_lon, 17)) AS orig_qk17,
                    year, month, day, SUBSTR(start_time,12, 2) AS hour, trip_id, is_moving

            FROM "inrixdatascience"."tripdata_na_restricted"
            WHERE
                qk = '02123'
                AND year IN ('2023', '2023')
                AND month IN ('05', '05')
                AND CAST(day AS INT) BETWEEN 8 AND 11
                AND provider IN ('475', '458', '470')
                AND REGEXP_LIKE(
                        BING_TILE_QUADKEY(BING_TILE_AT(end_lat, end_lon, 8)),
                        '(02123003)'
                    )
                    
                )
    SELECT year, month, day, hour, orig_qk17, dest_qk17, start_lat, start_lon, 
            end_lat, e

In [7]:
print(df.shape)
df.head()

(618213, 13)


Unnamed: 0,year,month,day,hour,orig_qk17,dest_qk17,start_lat,start_lon,end_lat,end_lon,count,trip_id,is_moving
0,2023,5,10,22,2123003000203112,2123003000322120,47.914219,-122.305076,47.882606,-122.243643,1,573d98ce66e303dca0fb561ba362403f,1
1,2023,5,10,23,2123003000213033,2123003002312303,47.90982,-122.266921,47.789106,-122.198045,1,661c0601b3bb0ea1dd66cc8356152ec9,1
2,2023,5,10,23,2123003000230231,2123003000030320,47.889296,-122.288991,47.947945,-122.288348,1,0f68c94b2e4b5d65a6e08254777b8c37,1
3,2023,5,10,23,2123003000332303,2123003000332213,47.87613,-122.197032,47.876675,-122.201666,1,a236898306d5f042f16544b513a73e4b,1
4,2023,5,10,23,2123003001030023,2123003001211030,47.954128,-122.118543,47.925987,-122.096079,1,1bac95d7331a1af112831e1db4752ed8,1
