In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
year = 2019
month = 1

monthly_data = pd.read_parquet(f'{year}/{year}-{month:02d}.parquet')

In [None]:
from calendar import monthrange

filter_columns = ['fare_amount', 'tip_amount', 'total_amount']
percentile = 0.9999

filter_string = ' & '.join(
    (
        f'("{year}, {month}, 1" <= tpep_pickup_datetime) & (tpep_pickup_datetime <= "{year}, {month}, {monthrange(year, month)[1]}")',
        f'("{year}, {month}, 1" <= tpep_dropoff_datetime) & (tpep_dropoff_datetime <= "{year}, {month}, {monthrange(year, month)[1]}")',
        '(tpep_pickup_datetime <= tpep_dropoff_datetime)',
        '(1 <= passenger_count) & (passenger_count <= 6)',
        '(0 < trip_distance) & (trip_distance <= 330)',
        '(1 <= PULocationID) & (PULocationID <= 263)',
        '(1 <= DOLocationID) & (DOLocationID <= 263)',
        '(2.5 <= fare_amount)',
        '(extra in (0, 0.5, 1))',
        '(0 <= tip_amount)',
        '(fare_amount <= total_amount)',
        *(f'({column} <= {column}.quantile({percentile}))' for column in filter_columns)
    )
)

monthly_data = monthly_data.query(filter_string)

monthly_data = monthly_data[['PULocationID', 'DOLocationID']]

In [None]:
frequency = pd.DataFrame()

frequency['PUFrequency'] = np.log(monthly_data.groupby(by='PULocationID').count())
frequency['DOFrequency'] = np.log(monthly_data.groupby(by='DOLocationID').count())

In [None]:
data = gpd.read_file('taxi_zones/taxi_zones.shp')

In [None]:
coloured_data = data.join(other=frequency)

coloured_data[['PUFrequency', 'DOFrequency']] = coloured_data[['PUFrequency', 'DOFrequency']].fillna(value=0)

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(13, 26))

coloured_data.plot(cmap='hot', vmax=13.5, column='PUFrequency', ax=ax[0], legend=True)
coloured_data.plot(cmap='hot', vmax=13.5, column='DOFrequency', ax=ax[1], legend=True)

ax[0].set_xticks([])
ax[0].set_yticks([])

ax[1].set_xticks([])
ax[1].set_yticks([])

ax[0].title.set_text('Pick Up Frequency')
ax[1].title.set_text('Drop Off Frequency')

plt.show()