In [1]:
import os
os.makedirs('output', exist_ok=True)

import sys
sys.path.append('../')

from utils import load_walksheds, overlay_wks

import pandas as pd
import numpy as np
import geopandas as gpd
import warnings
from fuzzywuzzy import process
warnings.filterwarnings('ignore')

* load the walksheds at 0.5 and 0.75 mile 

* Read IPCD data to geopandas https://data-usdot.opendata.arcgis.com/datasets/usdot::intermodal-passenger-connectivity-database-ipcd/explore?filters=eyJzdGF0ZSI6WyJEQyIsIk1EIiwiVkEiXX0%3D&location=41.815415%2C-121.815535%2C3.99&showTable=true
  
* Read in Amtrak stations dataset https://data-usdot.opendata.arcgis.com/datasets/amtrak-stations/explore

* Read in Amtrak ridership data for DC, MD, and VA from https://www.amtrak.com/state-fact-sheets

* Read in MTA MARC data https://data.imap.maryland.gov/datasets/e476dcb6dc154683ab63f23472bed5d6/about

* Read in WMATA bus ridership data 


In [2]:
wksp5, wksp75 = load_walksheds()

amtrak = pd.read_excel('../../Data/dmv_amtrak_ridership.xlsx', engine='openpyxl')
amtrak_geocode = gpd.read_file('../../Data/Amtrak_Stations.geojson')
amtrak_geocode = amtrak_geocode[(amtrak_geocode['state']=='MD') | (amtrak_geocode['state']=='DC') | (amtrak_geocode['state']=='VA')].reset_index(drop=True)

ipcd = gpd.read_file('../../Data/Intermodal_Passenger_Connectivity_Database_(IPCD).geojson')
marc = gpd.read_file('../../Data/MTA MARC train stations.geojson')

bus = pd.read_csv('../../Data/Jan 2017 metro ridership.csv')[['stations_name', 'sum']]
bus2022 = pd.read_excel('../All Ridership/output/all_ridership.xlsx', engine='openpyxl')
bus2022 = bus2022.groupby('START_PLACE_NAME').agg({'COUNT': 'sum'}).reset_index()
stn = gpd.read_file('../../Data/stations_all.geojson')



### Bus mode Ridership

##### Redundant code, using more recent data. Here I used Jan 2017 WMATA ridership data

In [3]:
bus['ridership'] = bus['sum'].str.replace(',', '').astype(np.int64)
bus.drop('sum', axis=1, inplace=True)
bus = bus.groupby('stations_name').agg({'ridership': 'sum'}).reset_index()

threshold = 85
matches = bus['stations_name'].apply(lambda x: process.extractOne(x, stn['Name'], score_cutoff=threshold))

bus['matched_key'] = [match[0] if match else None for match in matches]


In [4]:
bus = bus[[
    'matched_key', 'ridership'
]]

In [5]:
bus = bus.rename(columns={'matched_key':'Name_1'})

##### April - May 2022 WMATA ridership data 

-

In [6]:
bus2022 = bus2022.rename(columns={'START_PLACE_NAME':'Name_1', 'COUNT': 'ridership' })

In [7]:
bus2022['Name_1'] = bus2022['Name_1'].str.upper()

In [8]:
bus2022.head()

Unnamed: 0,Name_1,ridership
0,ADDISON ROAD,19259
1,ANACOSTIA,40970
2,ARCHIVES-NAVY MEMORIAL,76961
3,ARLINGTON CEMETERY,27714
4,BALLSTON,87634


### Air mode Ridership

* Flight data for year 2022. obtained from: https://www.transtats.bts.gov/Data_Elements.aspx?Data=1
* create dataframe with the stations name and their respective flight ridership

In [9]:
dca = pd.read_excel('../../Data/Airport DCA.xlsx', engine='openpyxl')
dulles = pd.read_excel('../../Data/aiport (Dulles).xlsx', engine='openpyxl')

In [10]:
flight = pd.DataFrame({'Name_1': ['Reagan Washington National Airport', 'Washington Dulles International Airport'], 
                                                              'ridership':[dca['total'].sum(), dulles['Total'].sum()]})

In [11]:
flight['Name_1'] = flight['Name_1'].str.upper()
flight

Unnamed: 0,Name_1,ridership
0,REAGAN WASHINGTON NATIONAL AIRPORT,11540151
1,WASHINGTON DULLES INTERNATIONAL AIRPORT,10250900



### Rail mode Ridership 

* process ridership for AMTRAK and MARC 
* concat bus, rail and flight mode ridership
* rename KING ST to KING ST-OLD TOWN
* Convert to csv 

In [12]:
marc_metro_p5, marc_metro_p75 = overlay_wks(marc)

marc_metro_p5 = marc_metro_p5[['Name_1', 'Avg_Wkdy', 'Avg_Wknd', 'geometry']].fillna(0)
marc_metro_p75 = marc_metro_p75[['Name_1', 'Avg_Wkdy', 'Avg_Wknd', 'geometry']].fillna(0)

marc_metro_p5['ridership_p5'] = marc_metro_p5['Avg_Wkdy'] + marc_metro_p5['Avg_Wknd']
marc_metro_p75['ridership_p75'] = marc_metro_p75['Avg_Wkdy'] + marc_metro_p75['Avg_Wknd']

marc_metro_p5.drop(['Avg_Wkdy', 'Avg_Wknd'], axis=1, inplace=True)
marc_metro_p75.drop(['Avg_Wkdy', 'Avg_Wknd'], axis=1, inplace=True)

In [13]:
marc_metro_p75.head(2)

Unnamed: 0,Name_1,geometry,ridership_p75
0,NEW CARROLLTON,POINT (-76.87231 38.94814),322.0
1,JUDICIARY SQUARE,POINT (-77.00616 38.89781),1452.0


In [14]:
amtrak_geocode['stationnam'] = amtrak_geocode.stationnam.str.split(',').str[0]

In [15]:
amtrak_met = amtrak_geocode.merge(amtrak, left_on='stationnam', right_on='City')[['stationnam', 'Ridership', 'geometry' ]]

In [16]:
amtrak_metro_p5, amtrak_metro_p75 = overlay_wks(amtrak_met)

In [17]:
amtrak_metro_p5.drop(['Acres', 'Shape_Leng', 'Shape_Area', 'StnCode', 'stationnam'], axis=1, inplace=True)
amtrak_metro_p75.drop(['Acres', 'Shape_Leng', 'Shape_Area', 'StnCode', 'stationnam'], axis=1, inplace=True)

In [18]:
amtrak_metro_p5 = amtrak_metro_p5.rename(columns={'Ridership':'ridership_p5'})
amtrak_metro_p75 = amtrak_metro_p75.rename(columns={'Ridership':'ridership_p75'})

In [19]:
rail_p5 = pd.concat([amtrak_metro_p5, marc_metro_p5])
rail_p75 = pd.concat([amtrak_metro_p75, marc_metro_p75])

In [20]:
rail_p5

Unnamed: 0,ridership_p5,Name_1,geometry
0,211971.0,KING ST-OLD TOWN,POINT (-77.06233 38.80651)
1,169699.0,NEW CARROLLTON,POINT (-76.87150 38.94811)
2,4058.0,ROCKVILLE,POINT (-77.14600 39.08455)
3,3631677.0,UNION STATION,POINT (-77.00642 38.89700)
0,322.0,NEW CARROLLTON,POINT (-76.87231 38.94814)
1,1452.0,UNION STATION,POINT (-77.00616 38.89781)
2,91.0,UNION STATION,POINT (-77.00616 38.89781)
3,107.0,UNION STATION,POINT (-77.00617 38.89781)
4,9.0,GREENBELT,POINT (-76.91259 39.00994)
5,12.0,COLLEGE PARK-U OF MD,POINT (-76.92874 38.97794)


In [21]:
rail_p5 = rail_p5.groupby('Name_1').agg({'ridership_p5':'sum'}).reset_index(False)
rail_p75 = rail_p75.groupby('Name_1').agg({'ridership_p75':'sum'}).reset_index(False)

In [22]:
bus_flight = pd.concat([flight, bus2022]).reset_index(drop=True)

In [23]:
bus_flight = bus_flight.groupby('Name_1').sum().reset_index()

In [24]:
bus_flight = bus_flight.rename(columns={'ridership':'ridership_p5'})
intercity_p5 = pd.concat([bus_flight, rail_p5]).reset_index(drop=True)

bus_flight = bus_flight.rename(columns={'ridership_p5':'ridership_p75'})
intercity_p75 = pd.concat([bus_flight, rail_p75]).reset_index(drop=True)

In [25]:
intercity_p5.loc[intercity_p5['Name_1'] == 'KING STREET', 'Name_1'] = 'KING ST-OLD TOWN'
intercity_p75.loc[intercity_p75['Name_1'] == 'KING STREET', 'Name_1'] = 'KING ST-OLD TOWN'

In [26]:
intercity_p5 = intercity_p5.groupby('Name_1').agg({'ridership_p5': 'sum'}).reset_index(False)
intercity_p75 = intercity_p75.groupby('Name_1').agg({'ridership_p75': 'sum'}).reset_index(False)

In [27]:
# pd.set_option('display.max_rows', None)
intercity_p5.to_csv('output/inter_cityhub_p5.csv', index=False)
intercity_p75.to_csv('output/inter_cityhub_p75.csv', index=False)