In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import openpyxl
import warnings
warnings.filterwarnings("ignore")

# AM Dataframe

* passengers includes the number of passengers in the desired rider class
* fare_track_miles includes the fare per track mile
* auto_tt_minpmile includes the interpolated auto travel times
* bus_tt_minpmile includes the bus travel times per mile
* households_half_mile includes the proportional number of households within each walkshed
* am_parking_users includes the number of parking users per OD pair
* num_bus_lines contains the number of bus lines and stops
* jobs_half_mile contains the proportional number of jobs within each walkshed
* trains_per_hour contains the number of trains in each time period (only AM will be used)
* terminal_station contains a dummy variable of whether or not a station is at the end of a line (1) or not (0)
* dist_to_core contains the distance from a station to the Metro Center station in miles
* parking_capacity contains the number of parking spots available per station
* mstns contains the station names and ID's used for merging the variables together

In [262]:
passengers = pd.read_excel("../Data Preprocessing/Ridership Data/output/full_fare_ridership.xlsx")
fare_track_miles = pd.read_excel("../Data Preprocessing/Fare per Track Mile/output/railOD_peak_fare_per_mile.xlsx")
auto_tt_minpmile = pd.read_csv("../Data Preprocessing/Interpolated Auto Travel Times/output/am_interpolated_auto_times.csv")
bus_tt_minpmile = pd.read_excel("../Data Preprocessing/Bus Travel Time/output/busttpermile_ML.xlsx") 
households_half_mile = pd.read_excel("../Data Preprocessing/Proportion of Households/output/walkshed_proportional_households_stations.xlsx") 
am_parking_users = pd.read_csv("../Data Preprocessing/Parking Users/output/am_parking_updated.csv")
num_bus_lines = pd.read_csv("../Data Preprocessing/Bus Lines and Stops/output/bus_line_stop.csv")
jobs_half_mile = pd.read_excel("../Data Preprocessing/Proportion of Jobs/output/proportional_jobs_stations.xlsx")
trains_per_hour = pd.read_excel("../Data/avgTrainThroughput_byPeriod_PeakOnly.xlsx") 
terminal_station = pd.read_csv("../Data/metro_ternimal_dummy.csv")
track_miles = pd.read_excel("../Data/railOD_trackMiles_spring2022.xlsx", header=1, index_col=0)
dist_to_core = pd.read_excel("../Data Preprocessing/Distance to the Core/output/stations.xlsx")
parking_capacity = pd.read_excel("../Data/parkingCapacityByStation.xlsx")
mstns = pd.read_excel("../Data/mstn_id_to_stn_name.xlsx")

# updated variables. 
hh_incomebelowpoverty = pd.read_csv("../Data Preprocessing/HH_IncomeBelowPoverty/output/HH_Income_povertylevel.csv")
hotels = pd.read_csv("../Data Preprocessing/Hotels Restaurants Entertainment/output/hotels_by_station.csv").drop('Unnamed: 0', axis=1)
entertainment = pd.read_csv("../Data Preprocessing/Hotels Restaurants Entertainment/output/entertainment_by_station.csv").drop('Unnamed: 0', axis=1)
restaurant = pd.read_csv("../Data Preprocessing/Hotels Restaurants Entertainment/output/restaurants_by_station.csv").drop('Unnamed: 0', axis=1)
inter_city = pd.read_csv("../Data Preprocessing/Intercity hubs/output/inter_cityhub.csv")
intersection_density = pd.read_csv("../Data Preprocessing/Intersection Density/output/proportional_intersection_density.csv")
job_access_by_transit = pd.read_csv("../Data Preprocessing/Job Accessibility 45mins by transit and Auto/job_auto_and_transit_accessibity45mins.csv")
convenience_store = pd.read_csv("../Data Preprocessing/number of convenience stores/output/convenience_by_station.csv").drop('Unnamed: 0', axis=1)
highschool_enrollment = pd.read_csv("../Data Preprocessing/Number of education institutions/output/Enrollment_highschool.csv")
college_enrollment = pd.read_csv("../Data Preprocessing/Number of education institutions/output/Enrollment_colleges.csv")
hh_0_car = pd.read_csv("../Data Preprocessing/Number of Households with no car or one car/victor-no of household w no car.csv")
hh_1_car = pd.read_csv("../Data Preprocessing/Number of Households with no car or one car/victor-no of household w 1 car.csv")
hh_morethan1_car = pd.read_csv("../Data Preprocessing/Number of Households with no car or one car/victor-no of household w more than 1cars.csv")
public_admin_jobs = pd.read_csv("../Data Preprocessing/Public Admin Jobs/output/proportional_publicadmin_workers_stations.csv")
senior_housing_afford = pd.read_csv("../Data Preprocessing/seniorhousing_affordability/senior_housing_facilitiesDMV.csv").drop('Unnamed: 0', axis=1)
bike_tt = pd.read_csv("../Data Preprocessing/bike travel time/output/bike_tt.csv").drop('count', axis=1)
bikeshare_cap = pd.read_csv("../Data Preprocessing/bikeshare capacity/output/bikeshare_capacity.csv")
bus_comp_index = pd.read_csv("../Data Preprocessing/bus competitiveness/bus_competativeness_index.csv")
ctpp_jobs = pd.read_csv("../Data Preprocessing/CTPP Jobs Data/output/CTPP_jobs.csv")

# All riders
all_riders = pd.read_csv("../Data Preprocessing/All Ridership/output/WMATA_entrances_exits_all_riders.csv").drop('Unnamed: 0', axis=1)

#### Merge the recent variables in a single df

* Sort the station names alphabetically
* Make the mstn file have columns of alternatively formatted station names to help with merging

In [3]:
mstns.sort_values(by = 'PRIMARY_NAME_FY23',inplace=True)
mstns['walkshed_filename'] = ['ADDISON ROAD-SEAT PLEASANT','ANACOSTIA','ARCHIVES-NAVY MEMORIAL-PENN QUARTER','ARLINGTON CEMETERY','Ashburn','BALLSTON-MU',
                              'BENNING ROAD','BETHESDA','BRADDOCK ROAD','BRANCH AVE','BROOKLAND-CUA','CAPITOL HEIGHTS','CAPITOL SOUTH','CHEVERLY',
                              'CLARENDON','CLEVELAND PARK','COLLEGE PARK-U OF MD','COLUMBIA HEIGHTS','CONGRESS HEIGHTS','COURT HOUSE','CRYSTAL CITY',
                              'DEANWOOD','LARGO TOWN CENTER','Washington Dulles International Airport', 'DUNN LORING-MERRIFIELD','DUPONT CIRCLE','EAST FALLS CHURCH','EASTERN MARKET','EISENHOWER AVENUE','FARRAGUT NORTH',
                              'FARRAGUT WEST','FEDERAL CENTER SW','FEDERAL TRIANGLE','FOGGY BOTTOM-GWU','FOREST GLEN','FORT TOTTEN','FRANCONIA-SPRINGFIELD',
                              'FRIENDSHIP HEIGHTS','GALLERY PL-CHINATOWN','GEORGIA AVE-PETWORTH','GLENMONT','GREENBELT','GREENSBORO','GROSVENOR-STRATHMORE',
                              'Herndon','HUNTINGTON',"PRINCE GEORGE'S PLAZA",'Innovation Center','JUDICIARY SQUARE','KING ST-OLD TOWN',"L'ENFANT PLAZA",'LANDOVER',
                              'Loudoun Gateway','MCLEAN','MCPHERSON SQUARE','MEDICAL CENTER','METRO CENTER','MINNESOTA AVE','MORGAN BOULEVARD',
                              'MT VERNON SQ 7TH ST-CONVENTION CENTER','NAVY YARD-BALLPARK','NAYLOR ROAD','NEW CARROLLTON','NOMA-GALLAUDET','WHITE FLINT','PENTAGON',
                              'PENTAGON CITY','POTOMAC AVE', "",'Reston Town Center','RHODE ISLAND AVE-BRENTWOOD','ROCKVILLE','RONALD REAGAN WASHINGTON NATIONAL AIRPORT',
                              'ROSSLYN','SHADY GROVE','SHAW-HOWARD U','SILVER SPRING','SMITHSONIAN','SOUTHERN AVENUE','SPRING HILL',
                              'STADIUM-ARMORY','SUITLAND','TAKOMA','TENLEYTOWN-AU','TWINBROOK','TYSONS CORNER','U STREET/AFRICAN-AMER CIVIL WAR MEMORIAL/CARDOZO',
                              'UNION STATION','VAN DORN STREET','VAN NESS-UDC','VIENNA/FAIRFAX-GMU','VIRGINIA SQUARE-GMU','WATERFRONT','WEST FALLS CHURCH-VT/UVA',
                              'WEST HYATTSVILLE','WHEATON','WIEHLE-RESTON EAST','WOODLEY PARK-ZOO/ADAMS MORGAN',]

mstns['WMATA_filename'] = ['Addison Road', 'Anacostia', 'Archives-Navy Memorial',
       'Arlington Cemetery', "", 'Ballston', 'Benning Road', 'Bethesda',
       'Braddock Road', 'Branch Avenue', 'Brookland', 'Capitol Heights',
       'Capitol South', 'Cheverly', 'Clarendon', 'Cleveland Park',
       'College Park-U of MD', 'Columbia Heights', 'Congress Heights',
       'Court House', 'Crystal City', 'Deanwood','Largo Town Center', "", 'Dunn Loring',
       'Dupont Circle', 'East Falls Church', 'Eastern Market',
       'Eisenhower Avenue', 'Farragut North', 'Farragut West',
       'Federal Center SW', 'Federal Triangle', 'Foggy Bottom',
       'Forest Glen', 'Fort Totten', 'Franconia-Springfield',
       'Friendship Heights', 'Gallery Place-Chinatown',
       'Georgia Avenue-Petworth', 'Glenmont', 'Greenbelt', 'Greensboro',
       'Grosvenor', "", 'Huntington',"Prince George's Plaza", "", 'Judiciary Square', 'King Street',
       "L'Enfant Plaza", 'Landover', "", 'McLean',
       'McPherson Square', 'Medical Center', 'Metro Center',
       'Minnesota Avenue', 'Morgan Blvd.', 'Mt. Vernon Square-UDC',
       'Navy Yard', 'Naylor Road', 'New Carrollton', 'New York Ave','White Flint',
       'Pentagon', 'Pentagon City', 'Potomac Avenue', "", "",'Rhode Island Avenue','Rockville', 
        'Reagan Washington National Airport',
        'Rosslyn', 'Shady Grove',
       'Shaw-Howard University', 'Silver Spring', 'Smithsonian',
       'Southern Avenue', 'Spring Hill', 'Stadium-Armory', 'Suitland',
       'Takoma', 'Tenleytown-AU', 'Twinbrook', 'Tysons Corner',
       'U Street-Cardozo', 'Union Station', 'Van Dorn Street',
       'Van Ness-UDC', 'Vienna', 'Virginia Square-GMU', 'Waterfront',
       'West Falls Church', 'West Hyattsville', 'Wheaton', 
       'Wiehle', 'Woodley Park-Zoo']

mstns['track_miles_names'] = ['Addison Road', 'Anacostia', 'Archives', 'Arlington Cemetery', "",
       'Ballston-MU', 'Benning Road', 'Bethesda', 'Braddock Road',
       'Branch Ave', 'Brookland-CUA', 'Capitol Heights', 'Capitol South',
       'Cheverly', 'Clarendon', 'Cleveland Park', 'College Park-U of Md',
       'Columbia Heights', 'Congress Heights', 'Court House',
       'Crystal City', 'Deanwood', 'Largo Town Center', "", 'Dunn Loring', 'Dupont Circle',
       'East Falls Church', 'Eastern Market', 'Eisenhower Ave',
       'Farragut North', 'Farragut West', 'Federal Center SW',
       'Federal Triangle', 'Foggy Bottom-GWU', 'Forest Glen',
       'Fort Totten', 'Franconia-Springfield', 'Friendship Heights',
       'Gallery Place', 'Georgia Ave-Petworth', 'Glenmont', 'Greenbelt',
       'Greensboro', 'Grosvenor-Strathmore', "",'Huntington',"Prince George's Plaza", "",
       'Judiciary Square', 'King St-Old Town', "L'Enfant Plaza",
       'Landover',"", 'McLean', 'McPherson Square',
       'Medical Center', 'Metro Center', 'Minnesota Ave',
       'Morgan Boulevard', 'Mt Vernon Sq', 'Navy Yard-Ballpark',
       'Naylor Road', 'New Carrollton', 'NoMa-Gallaudet U','White Flint', 'Pentagon',
       'Pentagon City', 'Potomac Ave', "","",
       'Rhode Island Ave', 'Rockville',
       'Ronald Reagan Washington National Airport', 'Rosslyn',
       'Shady Grove', 'Shaw-Howard Univ', 'Silver Spring', 'Smithsonian',
       'Southern Ave', 'Spring Hill', 'Stadium-Armory', 'Suitland',
       'Takoma', 'Tenleytown-AU', 'Twinbrook', 'Tysons Corner',
       'U Street', 'Union Station', 'Van Dorn Street', 'Van Ness-UDC',
       'Vienna', 'Virginia Square-GMU', 'Waterfront', 'West Falls Church',
       'West Hyattsville', 'Wheaton',  'Wiehle',
       'Woodley Park']
mstns['auto_filename'] = ['ADDISON ROAD-SEAT PLEASANT', 'ANACOSTIA',
       'ARCHIVES-NAVY MEMORIAL-PENN QUARTER', 'ARLINGTON CEMETERY',
       'ASHBURN', 'BALLSTON-MU', 'BENNING ROAD', 'BETHESDA',
       'BRADDOCK ROAD', 'BRANCH AVE', 'BROOKLAND-CUA', 'CAPITOL HEIGHTS',
       'CAPITOL SOUTH', 'CHEVERLY', 'CLARENDON', 'CLEVELAND PARK',
       'COLLEGE PARK-U OF MD', 'COLUMBIA HEIGHTS', 'CONGRESS HEIGHTS',
       'COURT HOUSE', 'CRYSTAL CITY', 'DEANWOOD', 'DOWNTOWN LARGO', 'WASHINGTON DULLES INTERNATIONAL AIRPORT',
       'DUNN LORING-MERRIFIELD', 'DUPONT CIRCLE', 'EAST FALLS CHURCH',
       'EASTERN MARKET', 'EISENHOWER AVENUE', 'FARRAGUT NORTH',
       'FARRAGUT WEST', 'FEDERAL CENTER SW', 'FEDERAL TRIANGLE',
       'FOGGY BOTTOM-GWU', 'FOREST GLEN', 'FORT TOTTEN',
       'FRANCONIA-SPRINGFIELD', 'FRIENDSHIP HEIGHTS',
       'GALLERY PLACE-CHINATOWN', 'GEORGIA AVENUE-PETWORTH', 'GLENMONT',
       'GREENBELT', 'GREENSBORO', 'GROSVENOR-STRATHMORE', 'HERNDON',
       'HUNTINGTON', 'HYATTSVILLE CROSSING', 'INNOVATION CENTER',
       'JUDICIARY SQUARE', 'KING STREET-OLD TOWN', "L'ENFANT PLAZA",
       'LANDOVER', 'LOUDOUN GATEWAY', 'MCLEAN', 'MCPHERSON SQUARE',
       'MEDICAL CENTER', 'METRO CENTER', 'MINNESOTA AVENUE',
       'MORGAN BOULEVARD', 'MT VERNON SQ 7TH ST-CONVENTION CENTER',
       'NAVY YARD-BALLPARK', 'NAYLOR ROAD', 'NEW CARROLLTON',
       'NOMA-GALLAUDET U', 'NORTH BETHESDA', 'PENTAGON', 'PENTAGON CITY',
       'POTOMAC AVE', "",'RESTON', 'RHODE ISLAND AVE-BRENTWOOD', 'ROCKVILLE',
       'RONALD REAGAN WASHINGTON NATIONAL AIRPORT', 'ROSSLYN',
       'SHADY GROVE', 'SHAW-HOWARD UNIVERSITY', 'SILVER SPRING',
       'SMITHSONIAN', 'SOUTHERN AVENUE', 'SPRING HILL', 'STADIUM-ARMORY',
       'SUITLAND', 'TAKOMA', 'TENLEYTOWN-AU', 'TWINBROOK', 'TYSONS',
       'U STREET/AFRICAN-AMER CIVIL WAR MEMORIAL/CARDOZO',
       'UNION STATION', 'VAN DORN STREET', 'VAN NESS-UDC',
       'VIENNA/FAIRFAX-GMU', 'VIRGINIA SQUARE-GMU',
       'WATERFRONT',
       'WEST FALLS CHURCH-VT/UVA', 'WEST HYATTSVILLE', 'WHEATON',
       'WIEHLE-RESTON EAST', 'WOODLEY PARK-ZOO/ADAMS MORGAN']

## AM Passenger Miles

* Filter for the AM trip passengers
* Format track_miles into OD pairs
* Give station names in track_miles proper MSTN IDs
* Create an od_pair key column in both the passengers and track miles dataframes
* merge track miles to passengers in the am, multiply the track miles by passengers, consolidate the columns, rename to passenger column

In [4]:
am_passengers = passengers[passengers['period']=='AM Peak']

In [5]:
track_miles = track_miles.stack().rename_axis(('O',"D")).reset_index(name='track_miles')

In [6]:
track_miles2 = mstns.merge(track_miles, left_on ='track_miles_names' , right_on= 'O')
track_miles2 = mstns.merge(track_miles2, left_on ='track_miles_names' , right_on='D' )

In [7]:
am_passengers['pairs'] = am_passengers.apply(lambda x: str(x['ID'])+str(0)+str(x['ID_1']), axis=1)
track_miles2['pairs'] = track_miles2.apply(lambda x: str(x['ID_y'])+str(0)+str(x['ID_x']), axis=1)


In [8]:
milesmerge = track_miles2.merge(am_passengers, left_on='pairs', right_on='pairs')
milesmerge['riders_miles'] = milesmerge['track_miles'] * milesmerge['COUNT']
am_dataframe_new = milesmerge[['O', 'D','walkshed_filename_x', 'walkshed_filename_y', 'track_miles', 'ID_x', 'ID_y','pairs', 'COUNT', 'riders_miles', ]]
am_dataframe_new.rename(columns={'COUNT':'passengers'}, inplace=True)
am_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15
1,Ballston-MU,Addison Road,ADDISON ROAD-SEAT PLEASANT,BALLSTON-MU,14.27,MSTN_062,MSTN_068,MSTN_0680MSTN_062,8,114.16
2,Benning Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,BENNING ROAD,2.46,MSTN_062,MSTN_003,MSTN_0030MSTN_062,56,137.76
3,Bethesda,Addison Road,ADDISON ROAD-SEAT PLEASANT,BETHESDA,16.36,MSTN_062,MSTN_052,MSTN_0520MSTN_062,7,114.52
4,Braddock Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,BRADDOCK ROAD,15.0,MSTN_062,MSTN_078,MSTN_0780MSTN_062,4,60.0


## AM Trains per Hour

* Filter only for weekday services and in the AM peak
* Groupby and get the sum of trains per hour
* Merge into the main AM dataframe

In [9]:
trains_per_hour2 = trains_per_hour[(trains_per_hour['ARRIVAL_PERIOD']=='AM Peak') & (trains_per_hour['SERVICETYPE']=='Weekday')]
trains_per_hour2.head()

Unnamed: 0,STATION_ID,STOP_ID,DIR_ID,SERVICETYPE,ARRIVAL_PERIOD,AVG_TRAINS
8,MSTN_001,F06,1,Weekday,AM Peak,17
10,MSTN_001,F06,2,Weekday,AM Peak,16
20,MSTN_002,F02,1,Weekday,AM Peak,31
22,MSTN_002,F02,2,Weekday,AM Peak,31
32,MSTN_003,G01,1,Weekday,AM Peak,24


In [10]:
trains_per_hour2 = trains_per_hour2.groupby(['STATION_ID']).sum().reset_index()
trains_per_hour2 = trains_per_hour2[['STATION_ID','AVG_TRAINS']]
trains_per_hour2.head()

Unnamed: 0,STATION_ID,AVG_TRAINS
0,MSTN_001,33
1,MSTN_002,62
2,MSTN_003,51
3,MSTN_004,56
4,MSTN_005,78


In [11]:
am_dataframe_new = am_dataframe_new.merge(trains_per_hour2, left_on='ID_x', right_on='STATION_ID')
am_dataframe_new = am_dataframe_new.merge(trains_per_hour2, left_on='ID_y', right_on='STATION_ID')

In [12]:
am_dataframe_new.drop(['STATION_ID_x', 'STATION_ID_y'], axis=1, inplace=True)

## Peak Fare & Fare per Track Mile

* Subset only the station ID columns from fare_track_miles
* Create a unique OD pair column
* Merge into the main AM dataframe

In [13]:
fare_track_miles= fare_track_miles[['O_MSTN_ID', 'D_MSTN_ID', 'peak_fare_per_mile2']]

In [14]:
fare_track_miles['pairs'] = fare_track_miles.apply(lambda x: str(x['O_MSTN_ID'])+str(0)+str(x['D_MSTN_ID']), axis=1)

In [15]:
am_dataframe_new = am_dataframe_new.merge(fare_track_miles)
am_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,AVG_TRAINS_x,AVG_TRAINS_y,O_MSTN_ID,D_MSTN_ID,peak_fare_per_mile2
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,53,33,MSTN_001,MSTN_062,0.36715
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,MSTN_001,MSTN_002,0.712025
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,27,33,MSTN_001,MSTN_065,0.472756
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,53,33,MSTN_001,MSTN_068,0.411184
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,51,33,MSTN_001,MSTN_003,0.39924


## Auto Travel Time & Auto Travel Time in Minutes/Mile

* Merge the interpolated auto travel times into the main AM dataframe

In [16]:
auto_tt_minpmile.head()

Unnamed: 0.1,Unnamed: 0,pairs,new_auto_tt2,new_auto_tt_per_mile2
0,0,MSTN_0620MSTN_062,6.699843,66.998432
1,1,MSTN_0010MSTN_062,13.18,1.641345
2,2,MSTN_0020MSTN_062,13.08,1.718791
3,3,MSTN_0650MSTN_062,30.537346,2.681066
4,4,MSTN_0680MSTN_062,40.669998,3.116475


In [17]:
am_dataframe_new = am_dataframe_new.merge(auto_tt_minpmile, on='pairs')
am_dataframe_new.head()

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,AVG_TRAINS_x,AVG_TRAINS_y,O_MSTN_ID,D_MSTN_ID,peak_fare_per_mile2,Unnamed: 0,new_auto_tt2,new_auto_tt_per_mile2
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,53,33,MSTN_001,MSTN_062,0.36715,1,13.18,1.641345
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,MSTN_001,MSTN_002,0.712025,183,13.160672,4.16477
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,27,33,MSTN_001,MSTN_065,0.472756,274,19.663729,3.151239
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,53,33,MSTN_001,MSTN_068,0.411184,365,38.189999,4.896153
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,51,33,MSTN_001,MSTN_003,0.39924,456,23.147509,2.933778


## Bus Travel Time & Bus Travel Time in Minutes/Mile

* Create a unique OD pair column
* Subset only the pairs, bus travel time and bus travel time per mile columns
* Merge into the main AM dataframe

In [18]:
bus_tt_minpmile['pairs'] = bus_tt_minpmile.apply(lambda x: str(x['O_MSTN_ID'])+str(0)+str(x['D_MSTN_ID']), axis=1)

In [19]:
bus_tt_minpmile2 = bus_tt_minpmile[['pairs', 'Travel Time', 'bus_tt_per_mile']]
bus_tt_minpmile2.rename(columns={'Travel Time':'bus_tt'}, inplace=True)

In [20]:
am_dataframe_new = am_dataframe_new.merge(bus_tt_minpmile2, on='pairs')
am_dataframe_new.head()

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,AVG_TRAINS_x,AVG_TRAINS_y,O_MSTN_ID,D_MSTN_ID,peak_fare_per_mile2,Unnamed: 0,new_auto_tt2,new_auto_tt_per_mile2,bus_tt,bus_tt_per_mile
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,53,33,MSTN_001,MSTN_062,0.36715,1,13.18,1.641345,82.0,12.304868
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,MSTN_001,MSTN_002,0.712025,183,13.160672,4.16477,31.0,10.305395
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,27,33,MSTN_001,MSTN_065,0.472756,274,19.663729,3.151239,,
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,53,33,MSTN_001,MSTN_068,0.411184,365,38.189999,4.896153,95.0,12.088109
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,51,33,MSTN_001,MSTN_003,0.39924,456,23.147509,2.933778,44.0,10.595066


## Households per Half Mile

* Subset only the ID, proportion of houses and the total number of households. Rename ID to MSTN
* Merge once based on the destinations and again based on the origins
* Drop duplicate columns

In [21]:
households_half_mile = households_half_mile[['ID', 'proportionhouses', 'Total Households', ]]
households_half_mile.rename(columns={'ID':'MSTN'}, inplace=True)

In [22]:
households_half_mile

Unnamed: 0,MSTN,proportionhouses,Total Households
0,MSTN_062,528.921138,1663
1,MSTN_001,2075.704342,3639
2,MSTN_002,2214.692242,3007
3,MSTN_065,0.003184,1
4,MSTN_097,555.593560,936
...,...,...,...
92,MSTN_046,2712.232190,3879
93,MSTN_045,4011.345203,6176
94,MSTN_091,1226.261747,3915
95,MSTN_040,5365.381132,7549


In [23]:
am_dataframe_new = am_dataframe_new.merge(households_half_mile, left_on='ID_x', right_on='MSTN')
am_dataframe_new = am_dataframe_new.merge(households_half_mile, left_on='ID_y', right_on='MSTN')
am_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,new_auto_tt2,new_auto_tt_per_mile2,bus_tt,bus_tt_per_mile,MSTN_x,proportionhouses_x,Total Households_x,MSTN_y,proportionhouses_y,Total Households_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,13.18,1.641345,82.0,12.304868,MSTN_062,528.921138,1663,MSTN_001,2075.704342,3639
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,13.160672,4.16477,31.0,10.305395,MSTN_002,2214.692242,3007,MSTN_001,2075.704342,3639
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,...,19.663729,3.151239,,,MSTN_065,0.003184,1,MSTN_001,2075.704342,3639
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,...,38.189999,4.896153,95.0,12.088109,MSTN_068,11603.906894,13574,MSTN_001,2075.704342,3639
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,...,23.147509,2.933778,44.0,10.595066,MSTN_003,3304.80553,4621,MSTN_001,2075.704342,3639


In [24]:
am_dataframe_new.drop(['O_MSTN_ID','D_MSTN_ID','MSTN_x','MSTN_y', 'Unnamed: 0'], axis =1, inplace=True)

## AM Parking Users

* Give station names for the am_parking_users proper MSTN IDs
* Create a unique OD pairs column
* Subset only the pairs and the parking_user column
* Merge into the main AM dataframe

In [25]:
am_parking_users2 = mstns.merge(am_parking_users, left_on = 'WMATA_filename', right_on= 'START_PLACE_NAME')
am_parking_users2 = mstns.merge(am_parking_users2, left_on = 'WMATA_filename' , right_on='END_PLACE_NAME')
am_parking_users2.head()

Unnamed: 0.1,ID_x,PRIMARY_NAME_FY23_x,walkshed_filename_x,WMATA_filename_x,track_miles_names_x,auto_filename_x,ID_y,PRIMARY_NAME_FY23_y,walkshed_filename_y,WMATA_filename_y,track_miles_names_y,auto_filename_y,Unnamed: 0,START_PLACE_NAME,END_PLACE_NAME,parking_user
0,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_001,Anacostia,ANACOSTIA,Anacostia,Anacostia,ANACOSTIA,87,Anacostia,Addison Road,0.0
1,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_068,Ballston-MU,BALLSTON-MU,Ballston,Ballston-MU,BALLSTON-MU,258,Ballston,Addison Road,0.0
2,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_003,Benning Road,BENNING ROAD,Benning Road,Benning Road,BENNING ROAD,348,Benning Road,Addison Road,0.0
3,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_052,Bethesda,BETHESDA,Bethesda,Bethesda,BETHESDA,437,Bethesda,Addison Road,0.0
4,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_078,Braddock Road,BRADDOCK ROAD,Braddock Road,Braddock Road,BRADDOCK ROAD,521,Braddock Road,Addison Road,0.0


In [26]:
am_parking_users2['pairs'] = am_parking_users2.apply(lambda x: str(x['ID_y'])+str(0)+str(x['ID_x']), axis=1)

In [27]:
am_parking_users2 = am_parking_users2[['pairs', 'parking_user']]

In [28]:
am_dataframe_new = am_dataframe_new.merge(am_parking_users2, on='pairs', how='left')
am_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,peak_fare_per_mile2,new_auto_tt2,new_auto_tt_per_mile2,bus_tt,bus_tt_per_mile,proportionhouses_x,Total Households_x,proportionhouses_y,Total Households_y,parking_user
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,0.36715,13.18,1.641345,82.0,12.304868,528.921138,1663,2075.704342,3639,0.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,0.712025,13.160672,4.16477,31.0,10.305395,2214.692242,3007,2075.704342,3639,17.0
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,...,0.472756,19.663729,3.151239,,,0.003184,1,2075.704342,3639,0.0
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,...,0.411184,38.189999,4.896153,95.0,12.088109,11603.906894,13574,2075.704342,3639,0.0
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,...,0.39924,23.147509,2.933778,44.0,10.595066,3304.80553,4621,2075.704342,3639,0.0


## Parking Capacity

* Drop the station column from parking_capacity and rename the MSTN_ID column to avoid errors while merging
* Merge into the AM dataframe first based on the origins and again based on the destinations
* Fill NAs in parking capacity with 0

In [29]:
parking_capacity.drop('STATION', axis=1, inplace=True)
parking_capacity.rename(columns = {'MSTN_ID':'mstn1'}, inplace=True)

In [30]:
am_dataframe_new = am_dataframe_new.merge(parking_capacity, left_on='ID_x', right_on='mstn1', how='left')
am_dataframe_new = am_dataframe_new.merge(parking_capacity, left_on='ID_y', right_on='mstn1', how='left')
am_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,bus_tt_per_mile,proportionhouses_x,Total Households_x,proportionhouses_y,Total Households_y,parking_user,mstn1_x,PARKING_CAPACITY_x,mstn1_y,PARKING_CAPACITY_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,12.304868,528.921138,1663,2075.704342,3639,0.0,MSTN_062,1268.0,MSTN_001,808.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,10.305395,2214.692242,3007,2075.704342,3639,17.0,,,MSTN_001,808.0
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,...,,0.003184,1,2075.704342,3639,0.0,,,MSTN_001,808.0
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,...,12.088109,11603.906894,13574,2075.704342,3639,0.0,,,MSTN_001,808.0
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,...,10.595066,3304.80553,4621,2075.704342,3639,0.0,,,MSTN_001,808.0


In [31]:
am_dataframe_new[['PARKING_CAPACITY_x','PARKING_CAPACITY_y',]] = am_dataframe_new[['PARKING_CAPACITY_x','PARKING_CAPACITY_y',]].fillna(0)

## Number of Bus Lines

* Rename the bus line count and bus stop columns to distinguish them
* Merge into the main AM dataframe, first based on destinations and again based on origins
* Drop columns that are redundant

In [32]:
num_bus_lines.rename(columns={'line_count':'bus_line_count','stop_count':'bus_stop_count'}, inplace=True)
num_bus_lines = num_bus_lines[['MSTN','bus_line_count','bus_stop_count']]

In [33]:
am_dataframe_new = am_dataframe_new.merge(num_bus_lines, left_on='ID_x', right_on='MSTN')
am_dataframe_new = am_dataframe_new.merge(num_bus_lines, left_on='ID_y', right_on='MSTN')
am_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,mstn1_x,PARKING_CAPACITY_x,mstn1_y,PARKING_CAPACITY_y,MSTN_x,bus_line_count_x,bus_stop_count_x,MSTN_y,bus_line_count_y,bus_stop_count_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,MSTN_062,1268.0,MSTN_001,808.0,MSTN_062,36.0,18.0,MSTN_001,64.0,36.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,,0.0,MSTN_001,808.0,MSTN_002,86.0,50.0,MSTN_001,64.0,36.0
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,...,,0.0,MSTN_001,808.0,MSTN_065,9.0,0.0,MSTN_001,64.0,36.0
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,...,,0.0,MSTN_001,808.0,MSTN_068,35.0,30.0,MSTN_001,64.0,36.0
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,...,,0.0,MSTN_001,808.0,MSTN_003,30.0,34.0,MSTN_001,64.0,36.0


In [34]:
am_dataframe_new.drop(['MSTN_x','MSTN_y', 'mstn1_x','mstn1_y'], axis =1, inplace=True)

## Jobs per Half Mile

* Merge Jobs into the AM dataframe first based on the origins and again based on the destinations

In [35]:
am_dataframe_new = am_dataframe_new.merge(jobs_half_mile, left_on='walkshed_filename_x', right_on='Name_1')
am_dataframe_new = am_dataframe_new.merge(jobs_half_mile, left_on='walkshed_filename_y', right_on='Name_1')

In [36]:
am_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,PARKING_CAPACITY_x,PARKING_CAPACITY_y,bus_line_count_x,bus_stop_count_x,bus_line_count_y,bus_stop_count_y,Name_1_x,All_Jobs_x,Name_1_y,All_Jobs_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,1268.0,808.0,36.0,18.0,64.0,36.0,ADDISON ROAD-SEAT PLEASANT,205.510639,ANACOSTIA,2332.571897
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,0.0,808.0,86.0,50.0,64.0,36.0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0,ANACOSTIA,2332.571897
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,...,0.0,808.0,9.0,0.0,64.0,36.0,ARLINGTON CEMETERY,122.300629,ANACOSTIA,2332.571897
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,...,0.0,808.0,35.0,30.0,64.0,36.0,BALLSTON-MU,51873.732661,ANACOSTIA,2332.571897
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,...,0.0,808.0,30.0,34.0,64.0,36.0,BENNING ROAD,1301.51626,ANACOSTIA,2332.571897


## Distance to the Core

* Give the station names in dist_to_core proper formatting to merge with mstns
* Give station names proper MSTN IDs
* Subset only the ID and distance_miles columns. Rename both for easy merging and clarity
* Merge into the AM dataframe first based on destinations, then based on origins

In [37]:
dist_to_core['STATIONNAM']=dist_to_core['STATIONNAM'].str.replace(' : 0 - 2640','')
dist_to_core['STATIONNAM']=dist_to_core['STATIONNAM'].str.replace(' : 0 - 22.4525758392805','')
dist_to_core['STATIONNAM'] = dist_to_core['STATIONNAM'].str.title()
dist_to_core['STATIONNAM'] = dist_to_core['STATIONNAM'].replace({"Addison Road-Seat Pleasant": 'Addison Road', "Archives-Navy Mem'L-Penn Quarter":"Archives", "Ballston-Mu": "Ballston-MU", "Brookland-Cua":"Brookland-CUA", "Dunn Loring-Merrifield": "Dunn Loring", "Largo Town Center":"Downtown Largo", "Eisenhower Avenue":"Eisenhower Ave", 'Federal Center Sw':'Federal Center SW', "Foggy Bottom-Gwu":"Foggy Bottom-GWU","Gallery Pl-Chinatown":"Gallary Place", "Mclean":"McLean", "Mcpherson Square":"McPherson Sq", "Mt Vernon Sq/7Th St-Convention Center":"Mt Vernon Sq", "Noma-Gallaudet U-New York Ave":"NoMa-Gallaudet U", "Rhode Island Ave-Brentwood":"Rhode Island Ave",  "Prince George'S Plaza":"Hyattsville Crossing", "Tenleytown-Au": "Tenleytown-AU", "Tysons Corner":"Tysons", "U St/African-Amer Civil War Memorial/Cardozo": "U Street", "Southern Avenue": "Southern Ave", "Van Ness-Udc":"Van Ness-UDC", "Virginia Square-Gmu":"Virginia Sq-GMU", "Vienna/Fairfax-Gmu":"Vienna", "Washington Dulles International Airport":"Dulles Airport", "West Falls Church-Vt/Uva":"West Falls Church", "Woodley Park-Zoo/Adams Morgan":"Woodley Park", "White Flint":"North Bethesda" })
dist_to_core['STATIONNAM'] = dist_to_core['STATIONNAM'].replace({"College Park-U Of Md": 'College Park-U of Md', "Gallary Place": "Gallery Place", "Shaw-Howard Univ":"Shaw-Howard U", "Innovation":"Innovation Center", "Reston":"Reston Town Center"})

In [38]:
dist_to_core2 = mstns.merge(dist_to_core, left_on = 'PRIMARY_NAME_FY23', right_on= 'STATIONNAM')

In [39]:
dist_to_core2 = dist_to_core2[['ID','distance_miles']]
dist_to_core2.rename(columns={'ID':'id1', 'distance_miles':'distance_to_core'}, inplace=True)

In [40]:
am_dataframe_new = am_dataframe_new.merge(dist_to_core2, left_on='ID_x', right_on='id1')
am_dataframe_new = am_dataframe_new.merge(dist_to_core2, left_on='ID_y', right_on='id1')
am_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,bus_line_count_y,bus_stop_count_y,Name_1_x,All_Jobs_x,Name_1_y,All_Jobs_y,id1_x,distance_to_core_x,id1_y,distance_to_core_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,64.0,36.0,ADDISON ROAD-SEAT PLEASANT,205.510639,ANACOSTIA,2332.571897,MSTN_062,7.181143,MSTN_001,3.007895
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,64.0,36.0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0,ANACOSTIA,2332.571897,MSTN_002,0.435964,MSTN_001,3.007895
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,...,64.0,36.0,ARLINGTON CEMETERY,122.300629,ANACOSTIA,2332.571897,MSTN_065,2.127338,MSTN_001,3.007895
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,...,64.0,36.0,BALLSTON-MU,51873.732661,ANACOSTIA,2332.571897,MSTN_068,4.652737,MSTN_001,3.007895
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,...,64.0,36.0,BENNING ROAD,1301.51626,ANACOSTIA,2332.571897,MSTN_003,4.917774,MSTN_001,3.007895


## Terminal Station Dummy

* Subset only the dummy column and the station IDs
* Merge into the AM dataframe once based on destinations and again based on origins

In [41]:
terminal_station2 = terminal_station[['MSTN','terminal_dummy_2023']]

In [42]:
am_dataframe_new = am_dataframe_new.merge(terminal_station2, left_on='ID_x', right_on='MSTN')
am_dataframe_new = am_dataframe_new.merge(terminal_station2, left_on='ID_y', right_on='MSTN')

## Bike travel time in minutes

In [43]:
bike_tt.head()

Unnamed: 0,name_O,name_D,median
0,ANACOSTIA,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,22.166667
1,ANACOSTIA,CAPITOL SOUTH,11.683333
2,ANACOSTIA,EASTERN MARKET,11.808333
3,ANACOSTIA,FARRAGUT NORTH,29.066667
4,ANACOSTIA,FARRAGUT WEST,29.066667


In [44]:
bike_tt['bike_traveltime'] = bike_tt['median']
bike_tt = bike_tt.drop('median', axis=1)

In [45]:
am_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,Name_1_y,All_Jobs_y,id1_x,distance_to_core_x,id1_y,distance_to_core_y,MSTN_x,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,ANACOSTIA,2332.571897,MSTN_062,7.181143,MSTN_001,3.007895,MSTN_062,,MSTN_001,
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,ANACOSTIA,2332.571897,MSTN_002,0.435964,MSTN_001,3.007895,MSTN_002,,MSTN_001,
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,...,ANACOSTIA,2332.571897,MSTN_065,2.127338,MSTN_001,3.007895,MSTN_065,,MSTN_001,
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,...,ANACOSTIA,2332.571897,MSTN_068,4.652737,MSTN_001,3.007895,MSTN_068,,MSTN_001,
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,12,94.68,...,ANACOSTIA,2332.571897,MSTN_003,4.917774,MSTN_001,3.007895,MSTN_003,,MSTN_001,


In [46]:
am_dataframe_new = am_dataframe_new.merge(bike_tt, left_on=['walkshed_filename_y', 'walkshed_filename_x'], 
                       right_on=['name_O', 'name_D'], how='left').drop(['name_O', 'name_D'], axis=1)

In [47]:
am_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,All_Jobs_y,id1_x,distance_to_core_x,id1_y,distance_to_core_y,MSTN_x,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y,bike_traveltime
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,2332.571897,MSTN_062,7.181143,MSTN_001,3.007895,MSTN_062,,MSTN_001,,
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,2332.571897,MSTN_002,0.435964,MSTN_001,3.007895,MSTN_002,,MSTN_001,,22.166667


## Bikeshare Capacity

In [48]:
bikeshare_cap.head()

Unnamed: 0,Name_1,CAPACITY
0,ANACOSTIA,30
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,324
2,BALLSTON-MU,133
3,BENNING ROAD,19
4,BETHESDA,90


In [49]:
bikeshare_cap['bike_cap'] = bikeshare_cap['CAPACITY']
bikeshare_cap = bikeshare_cap.drop('CAPACITY', axis=1)

In [50]:
am_dataframe_new = am_dataframe_new.merge(bikeshare_cap, left_on='walkshed_filename_x', right_on='Name_1', how='left')
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(bikeshare_cap, left_on='walkshed_filename_y', right_on='Name_1', how='left')

In [51]:
am_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,id1_y,distance_to_core_y,MSTN_x,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y,bike_traveltime,bike_cap_x,Name_1,bike_cap_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,MSTN_001,3.007895,MSTN_062,,MSTN_001,,,,ANACOSTIA,30.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,MSTN_001,3.007895,MSTN_002,,MSTN_001,,22.166667,324.0,ANACOSTIA,30.0


## Bus competitveness Index

In [52]:
bus_comp_index = bus_comp_index[['station_name_o', 'station_name_d', 'bus_competativeness_index']].copy()

In [53]:
bus_comp_index.head()

Unnamed: 0,station_name_o,station_name_d,bus_competativeness_index
0,Anacostia,Anacostia,1.0
1,Anacostia,Archives,0.783972
2,Anacostia,Archives,0.26455
3,Anacostia,Benning Road,0.479705
4,Anacostia,Brookland-CUA,0.413793


In [54]:
am_dataframe_new = am_dataframe_new.merge(bus_comp_index, left_on=['O', 'D'], 
                       right_on=['station_name_o', 'station_name_d'], how='left').drop(['station_name_o', 'station_name_d'], axis=1)

In [55]:
am_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,distance_to_core_y,MSTN_x,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y,bike_traveltime,bike_cap_x,Name_1,bike_cap_y,bus_competativeness_index
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,3.007895,MSTN_062,,MSTN_001,,,,ANACOSTIA,30.0,0.865438
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,3.007895,MSTN_002,,MSTN_001,,22.166667,324.0,ANACOSTIA,30.0,0.783972


## CTPP Jobs

In [56]:
# pd.set_option('display.max_rows', None)
# pd.options.display.max_rows = None
# pd.reset_option('all')
ctpp_jobs = ctpp_jobs[['Name_1', 'proportionjobs' ]].rename(columns={'proportionjobs': 'ctpp_jobs'}).copy()

In [57]:
ctpp_jobs.head(2)

Unnamed: 0,Name_1,ctpp_jobs
0,ADDISON ROAD-SEAT PLEASANT,116.616376
1,ANACOSTIA,1763.747494


In [58]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(ctpp_jobs, left_on='walkshed_filename_x', right_on='Name_1').drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(ctpp_jobs, left_on='walkshed_filename_y', right_on='Name_1')

In [59]:
am_dataframe_new#.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y,bike_traveltime,bike_cap_x,bike_cap_y,bus_competativeness_index,ctpp_jobs_x,Name_1,ctpp_jobs_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,,MSTN_001,,,,30.0,0.865438,116.616376,ANACOSTIA,1763.747494
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,,MSTN_001,,22.166667,324.0,30.0,0.783972,56594.206356,ANACOSTIA,1763.747494
2,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,,MSTN_001,,22.166667,324.0,30.0,0.264550,56594.206356,ANACOSTIA,1763.747494
3,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.20,...,,MSTN_001,,,,30.0,0.774713,35.889042,ANACOSTIA,1763.747494
4,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.40,...,,MSTN_001,,,133.0,30.0,0.361587,7944.309562,ANACOSTIA,1763.747494
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8519,Arlington Cemetery,Ronald Reagan Washington National Airport,RONALD REAGAN WASHINGTON NATIONAL AIRPORT,ARLINGTON CEMETERY,3.21,MSTN_073,MSTN_065,MSTN_0650MSTN_073,1,3.21,...,,MSTN_065,,,19.0,,0.691443,889.107083,ARLINGTON CEMETERY,35.889042
8520,Arlington Cemetery,Rosslyn,ROSSLYN,ARLINGTON CEMETERY,0.94,MSTN_060,MSTN_065,MSTN_0650MSTN_060,2,1.88,...,,MSTN_065,,,153.0,,0.245399,2977.370309,ARLINGTON CEMETERY,35.889042
8521,Arlington Cemetery,Smithsonian,SMITHSONIAN,ARLINGTON CEMETERY,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,...,,MSTN_065,,,192.0,,0.792303,9891.356261,ARLINGTON CEMETERY,35.889042
8522,Arlington Cemetery,Smithsonian,SMITHSONIAN,ARLINGTON CEMETERY,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,...,,MSTN_065,,,192.0,,0.355150,9891.356261,ARLINGTON CEMETERY,35.889042


## Household Income below 50k

In [60]:
hh_incomebelowpoverty.head(2)

Unnamed: 0,Name_1,HH_below_50k
0,ADDISON ROAD-SEAT PLEASANT,922.4
1,ANACOSTIA,2932.3


In [61]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(hh_incomebelowpoverty, left_on='walkshed_filename_x', right_on='Name_1').drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(hh_incomebelowpoverty, left_on='walkshed_filename_y', right_on='Name_1')


In [62]:
am_dataframe_new

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,terminal_dummy_2023_y,bike_traveltime,bike_cap_x,bike_cap_y,bus_competativeness_index,ctpp_jobs_x,ctpp_jobs_y,HH_below_50k_x,Name_1,HH_below_50k_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,,,,30.0,0.865438,116.616376,1763.747494,922.4,ANACOSTIA,2932.3
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,,22.166667,324.0,30.0,0.783972,56594.206356,1763.747494,548.4,ANACOSTIA,2932.3
2,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,,22.166667,324.0,30.0,0.264550,56594.206356,1763.747494,548.4,ANACOSTIA,2932.3
3,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.20,...,,,,30.0,0.774713,35.889042,1763.747494,0.0,ANACOSTIA,2932.3
4,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.40,...,,,133.0,30.0,0.361587,7944.309562,1763.747494,933.3,ANACOSTIA,2932.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8519,Arlington Cemetery,Ronald Reagan Washington National Airport,RONALD REAGAN WASHINGTON NATIONAL AIRPORT,ARLINGTON CEMETERY,3.21,MSTN_073,MSTN_065,MSTN_0650MSTN_073,1,3.21,...,,,19.0,,0.691443,889.107083,35.889042,2.7,ARLINGTON CEMETERY,0.0
8520,Arlington Cemetery,Rosslyn,ROSSLYN,ARLINGTON CEMETERY,0.94,MSTN_060,MSTN_065,MSTN_0650MSTN_060,2,1.88,...,,,153.0,,0.245399,2977.370309,35.889042,611.6,ARLINGTON CEMETERY,0.0
8521,Arlington Cemetery,Smithsonian,SMITHSONIAN,ARLINGTON CEMETERY,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,...,,,192.0,,0.792303,9891.356261,35.889042,756.3,ARLINGTON CEMETERY,0.0
8522,Arlington Cemetery,Smithsonian,SMITHSONIAN,ARLINGTON CEMETERY,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,...,,,192.0,,0.355150,9891.356261,35.889042,756.3,ARLINGTON CEMETERY,0.0


## Number of hotels within the walkshed

In [63]:
hotels.head(2)

Unnamed: 0,Name_1,hotelcount
0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,4
1,BETHESDA,1


In [64]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(hotels, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(hotels, left_on='walkshed_filename_y', right_on='Name_1', how='left')

In [65]:
am_dataframe_new

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,bike_cap_x,bike_cap_y,bus_competativeness_index,ctpp_jobs_x,ctpp_jobs_y,HH_below_50k_x,HH_below_50k_y,hotelcount_x,Name_1,hotelcount_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,,30.0,0.865438,116.616376,1763.747494,922.4,2932.3,,,
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,324.0,30.0,0.783972,56594.206356,1763.747494,548.4,2932.3,4.0,,
2,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,324.0,30.0,0.264550,56594.206356,1763.747494,548.4,2932.3,4.0,,
3,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.20,...,,30.0,0.774713,35.889042,1763.747494,0.0,2932.3,,,
4,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.40,...,133.0,30.0,0.361587,7944.309562,1763.747494,933.3,2932.3,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8519,Arlington Cemetery,Ronald Reagan Washington National Airport,RONALD REAGAN WASHINGTON NATIONAL AIRPORT,ARLINGTON CEMETERY,3.21,MSTN_073,MSTN_065,MSTN_0650MSTN_073,1,3.21,...,19.0,,0.691443,889.107083,35.889042,2.7,0.0,,,
8520,Arlington Cemetery,Rosslyn,ROSSLYN,ARLINGTON CEMETERY,0.94,MSTN_060,MSTN_065,MSTN_0650MSTN_060,2,1.88,...,153.0,,0.245399,2977.370309,35.889042,611.6,0.0,,,
8521,Arlington Cemetery,Smithsonian,SMITHSONIAN,ARLINGTON CEMETERY,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,...,192.0,,0.792303,9891.356261,35.889042,756.3,0.0,1.0,,
8522,Arlington Cemetery,Smithsonian,SMITHSONIAN,ARLINGTON CEMETERY,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,...,192.0,,0.355150,9891.356261,35.889042,756.3,0.0,1.0,,


## Number of entertainment locations within the walkshed

In [66]:
entertainment.head()

Unnamed: 0,Name_1,entertainmentcount
0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8
1,BALLSTON-MU,1
2,BETHESDA,3
3,BROOKLAND-CUA,4
4,CAPITOL SOUTH,1


In [67]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(entertainment, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(entertainment, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of restaurants located within each walksheds

In [68]:
restaurant.head(2)

Unnamed: 0,Name_1,restaurantcount
0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,4
1,BETHESDA,1


In [69]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(restaurant, left_on='walkshed_filename_x', right_on='Name_1', how='left').drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(restaurant, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Station is an Inter-city hub (by ridership)

In [70]:
inter_city.head(2)

Unnamed: 0,Name_1,ridership
0,ADDISON ROAD,14657.0
1,ANACOSTIA,25523.0


In [71]:
inter_city['intercityhub'] = inter_city['ridership']
inter_city.drop('ridership', axis=1, inplace=True)

In [72]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(inter_city, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(inter_city, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Intersection density

In [73]:
intersection_density.head(2)

Unnamed: 0,Name_1,proportional street density
0,ADDISON ROAD-SEAT PLEASANT,87.782487
1,ANACOSTIA,71.775767


In [74]:
intersection_density['prop_str_dens'] = intersection_density['proportional street density']
intersection_density.drop('proportional street density', axis=1, inplace=True)

In [75]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(intersection_density, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(intersection_density, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Job Accessibility by Transit

In [76]:
job_access_by_transit.head(2)

Unnamed: 0,Name_1,Jobs_auto,Jobs_transit
0,ADDISON ROAD-SEAT PLEASANT,22677332.0,15074627.0
1,ANACOSTIA,41573746.0,47484669.0


In [77]:
job_access_by_transit = job_access_by_transit[['Name_1', 'Jobs_transit']]

In [78]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(job_access_by_transit, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(job_access_by_transit, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of convenience stores within each walksheds

In [79]:
convenience_store.head(2)

Unnamed: 0,Name_1,convenience_storecount
0,ANACOSTIA,3
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,1


In [80]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(convenience_store, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(convenience_store, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## High School Enrollment

In [81]:
highschool_enrollment.head(2)

Unnamed: 0,Name_1,TOTAL
0,ANACOSTIA,1593.0
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,728.0


In [82]:
highschool_enrollment['highschoolenroll'] = highschool_enrollment['TOTAL']
highschool_enrollment.drop('TOTAL', axis=1, inplace=True)

In [83]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(highschool_enrollment, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(highschool_enrollment, left_on='walkshed_filename_y', right_on='Name_1', how='left')

In [84]:
am_dataframe_new

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,intercityhub_y,prop_str_dens_x,prop_str_dens_y,Jobs_transit_x,Jobs_transit_y,convenience_storecount_x,convenience_storecount_y,highschoolenroll_x,Name_1,highschoolenroll_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,...,25523.0,87.782487,71.775767,15074627.0,47484669.0,,3.0,,ANACOSTIA,1593.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,25523.0,170.832426,71.775767,13524248.0,47484669.0,1.0,3.0,728.0,ANACOSTIA,1593.0
2,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,...,25523.0,170.832426,71.775767,13524248.0,47484669.0,1.0,3.0,728.0,ANACOSTIA,1593.0
3,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.20,...,25523.0,54.509764,71.775767,0.0,47484669.0,,3.0,,ANACOSTIA,1593.0
4,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.40,...,25523.0,1439.739714,71.775767,113915773.0,47484669.0,4.0,3.0,2436.0,ANACOSTIA,1593.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8519,Arlington Cemetery,Ronald Reagan Washington National Airport,RONALD REAGAN WASHINGTON NATIONAL AIRPORT,ARLINGTON CEMETERY,3.21,MSTN_073,MSTN_065,MSTN_0650MSTN_073,1,3.21,...,12263.0,16.724355,54.509764,0.0,0.0,1.0,,,,
8520,Arlington Cemetery,Rosslyn,ROSSLYN,ARLINGTON CEMETERY,0.94,MSTN_060,MSTN_065,MSTN_0650MSTN_060,2,1.88,...,12263.0,375.201493,54.509764,74526768.0,0.0,2.0,,0.0,,
8521,Arlington Cemetery,Smithsonian,SMITHSONIAN,ARLINGTON CEMETERY,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,...,12263.0,121.718152,54.509764,0.0,0.0,,,,,
8522,Arlington Cemetery,Smithsonian,SMITHSONIAN,ARLINGTON CEMETERY,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,...,12263.0,121.718152,54.509764,0.0,0.0,,,,,


## College Enrollment within the walkshed

In [85]:
college_enrollment.head()

Unnamed: 0,Name_1,TOT_ENROLL
0,BROOKLAND-CUA,5790.0
1,CLARENDON,8.0
2,COURT HOUSE,4797.0
3,CRYSTAL CITY,580.0
4,DUPONT CIRCLE,340.0


In [86]:
college_enrollment['college_enroll'] = college_enrollment['TOT_ENROLL']
college_enrollment.drop('TOT_ENROLL', axis=1, inplace=True)

In [87]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(college_enrollment, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(college_enrollment, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Household with zero cars

In [88]:
hh_0_car.head(2)

Unnamed: 0,Name_1,Num of Household with No Car
0,ADDISON ROAD-SEAT PLEASANT,16.0
1,ANACOSTIA,9.0


In [89]:
hh_0_car['HH_0_car'] = hh_0_car['Num of Household with No Car']
hh_0_car.drop('Num of Household with No Car', axis=1, inplace=True)

In [90]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(hh_0_car, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(hh_0_car, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Households with exactly 1 car

In [91]:
hh_1_car.head(2)

Unnamed: 0,Name_1,Num of Household with 1 Car
0,ADDISON ROAD-SEAT PLEASANT,182.0
1,ANACOSTIA,62.5


In [92]:
hh_1_car['HH_1_car'] = hh_1_car['Num of Household with 1 Car']
hh_1_car.drop('Num of Household with 1 Car', axis=1, inplace=True)

In [93]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(hh_1_car, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(hh_1_car, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Households with more than 1 car 

In [94]:
hh_morethan1_car.head(2)

Unnamed: 0,Name_1,Num of Household with More than One Car
0,ADDISON ROAD-SEAT PLEASANT,214.0
1,ANACOSTIA,14.5


In [95]:
hh_morethan1_car['HH_more1_car'] = hh_morethan1_car['Num of Household with More than One Car']
hh_morethan1_car.drop('Num of Household with More than One Car', axis=1, inplace=True)

In [96]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(hh_morethan1_car, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(hh_morethan1_car, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Public Admin jobs

In [97]:
public_admin_jobs.head(2)

Unnamed: 0,Name_1,CNS20
0,ADDISON ROAD-SEAT PLEASANT,0.0
1,ANACOSTIA,352.027012


In [98]:
public_admin_jobs['pub_admin_jobs'] = public_admin_jobs['CNS20']
public_admin_jobs.drop('CNS20', axis=1, inplace=True)

In [99]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(public_admin_jobs, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(public_admin_jobs, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of Affordable Housing Units/Senior housing facilities within each walksheds

In [100]:
senior_housing_afford.head(2)

Unnamed: 0,Name_1,TotalUnits
0,ANACOSTIA,1200
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,214


In [101]:
senior_housing_afford['housing_units_afford'] = senior_housing_afford['TotalUnits']
senior_housing_afford.drop('TotalUnits', axis=1, inplace=True)

In [102]:
am_dataframe_new.drop('Name_1', axis=1, inplace=True)
am_dataframe_new = am_dataframe_new.merge(senior_housing_afford, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
am_dataframe_new = am_dataframe_new.merge(senior_housing_afford, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Log of Variables and Export

* Turn column names with _x or _y to _O and _D
* Drop redundant columns
* Fill 0s and NAs with 0.01 to prevent errors with taking the natural log
* Fill terminal dummy columns with 0

In [103]:
am_dataframe_new.columns = am_dataframe_new.columns.str.replace('_x','_D')
am_dataframe_new.columns = am_dataframe_new.columns.str.replace('_y','_O')
am_dataframe_new.columns

Index(['O', 'D', 'walkshed_filename_D', 'walkshed_filename_O', 'track_miles',
       'ID_D', 'ID_O', 'pairs', 'passengers', 'riders_miles', 'AVG_TRAINS_D',
       'AVG_TRAINS_O', 'peak_fare_per_mile2', 'new_auto_tt2',
       'new_auto_tt_per_mile2', 'bus_tt', 'bus_tt_per_mile',
       'proportionhouses_D', 'Total Households_D', 'proportionhouses_O',
       'Total Households_O', 'parking_user', 'PARKING_CAPACITY_D',
       'PARKING_CAPACITY_O', 'bus_line_count_D', 'bus_stop_count_D',
       'bus_line_count_O', 'bus_stop_count_O', 'Name_1_D', 'All_Jobs_D',
       'Name_1_O', 'All_Jobs_O', 'id1_D', 'distance_to_core_D', 'id1_O',
       'distance_to_core_O', 'MSTN_D', 'terminal_dummy_2023_D', 'MSTN_O',
       'terminal_dummy_2023_O', 'bike_traveltime', 'bike_cap_D', 'bike_cap_O',
       'bus_competativeness_index', 'ctpp_jobs_D', 'ctpp_jobs_O',
       'HH_below_50k_D', 'HH_below_50k_O', 'hotelcount_D', 'hotelcount_O',
       'entertainmentcount_D', 'entertainmentcount_O', 'restaurantcount_

In [104]:
am_dataframe_new.drop(['Name_1', 'Name_1_O','Name_1_D','id1_O','id1_D', 'MSTN_O', 'MSTN_D', 'walkshed_filename_D','walkshed_filename_O'], axis =1, inplace=True)

In [105]:
am_dataframe_new.head(2)

Unnamed: 0,O,D,track_miles,ID_D,ID_O,pairs,passengers,riders_miles,AVG_TRAINS_D,AVG_TRAINS_O,...,HH_0_car_D,HH_0_car_O,HH_1_car_D,HH_1_car_O,HH_more1_car_D,HH_more1_car_O,pub_admin_jobs_D,pub_admin_jobs_O,housing_units_afford_D,housing_units_afford_O
0,Anacostia,Addison Road,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,53,33,...,16.0,9.0,182.0,62.5,214.0,14.5,0.0,352.027012,,1200.0
1,Anacostia,Archives,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,...,0.0,9.0,0.0,62.5,0.0,14.5,0.0,352.027012,214.0,1200.0


In [106]:
am_dataframe_new.columns

Index(['O', 'D', 'track_miles', 'ID_D', 'ID_O', 'pairs', 'passengers',
       'riders_miles', 'AVG_TRAINS_D', 'AVG_TRAINS_O', 'peak_fare_per_mile2',
       'new_auto_tt2', 'new_auto_tt_per_mile2', 'bus_tt', 'bus_tt_per_mile',
       'proportionhouses_D', 'Total Households_D', 'proportionhouses_O',
       'Total Households_O', 'parking_user', 'PARKING_CAPACITY_D',
       'PARKING_CAPACITY_O', 'bus_line_count_D', 'bus_stop_count_D',
       'bus_line_count_O', 'bus_stop_count_O', 'All_Jobs_D', 'All_Jobs_O',
       'distance_to_core_D', 'distance_to_core_O', 'terminal_dummy_2023_D',
       'terminal_dummy_2023_O', 'bike_traveltime', 'bike_cap_D', 'bike_cap_O',
       'bus_competativeness_index', 'ctpp_jobs_D', 'ctpp_jobs_O',
       'HH_below_50k_D', 'HH_below_50k_O', 'hotelcount_D', 'hotelcount_O',
       'entertainmentcount_D', 'entertainmentcount_O', 'restaurantcount_D',
       'restaurantcount_O', 'intercityhub_D', 'intercityhub_O',
       'prop_str_dens_D', 'prop_str_dens_O', 'Jobs_tra

In [107]:
cols_transform = am_dataframe_new.columns[6:]

for col in cols_transform:
    am_dataframe_new[f'log_{col}'] = np.log1p(am_dataframe_new[col])

In [108]:
am_dataframe_new

Unnamed: 0,O,D,track_miles,ID_D,ID_O,pairs,passengers,riders_miles,AVG_TRAINS_D,AVG_TRAINS_O,...,log_HH_0_car_D,log_HH_0_car_O,log_HH_1_car_D,log_HH_1_car_O,log_HH_more1_car_D,log_HH_more1_car_O,log_pub_admin_jobs_D,log_pub_admin_jobs_O,log_housing_units_afford_D,log_housing_units_afford_O
0,Anacostia,Addison Road,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,53,33,...,2.833213,2.302585,5.209486,4.15104,5.370638,2.74084,0.000000,5.866545,,7.09091
1,Anacostia,Archives,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,...,0.000000,2.302585,0.000000,4.15104,0.000000,2.74084,0.000000,5.866545,5.370638,7.09091
2,Anacostia,Archives,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,...,0.000000,2.302585,0.000000,4.15104,0.000000,2.74084,0.000000,5.866545,5.370638,7.09091
3,Anacostia,Arlington Cemetery,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.20,27,33,...,0.000000,2.302585,0.000000,4.15104,0.000000,2.74084,0.000000,5.866545,,7.09091
4,Anacostia,Ballston-MU,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.40,53,33,...,0.000000,2.302585,4.166665,4.15104,3.850148,2.74084,5.344562,5.866545,6.347389,7.09091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8519,Arlington Cemetery,Ronald Reagan Washington National Airport,3.21,MSTN_073,MSTN_065,MSTN_0650MSTN_073,1,3.21,56,27,...,3.258097,0.000000,4.007333,0.00000,2.674149,0.00000,1.185425,0.000000,,
8520,Arlington Cemetery,Rosslyn,0.94,MSTN_060,MSTN_065,MSTN_0650MSTN_060,2,1.88,79,27,...,0.000000,0.000000,4.795791,0.00000,3.332205,0.00000,6.536193,0.000000,6.338594,
8521,Arlington Cemetery,Smithsonian,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,78,27,...,0.000000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.000000,5.370638,
8522,Arlington Cemetery,Smithsonian,4.14,MSTN_032,MSTN_065,MSTN_0650MSTN_032,5,20.70,78,27,...,0.000000,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.000000,5.370638,


In [109]:
# am_dataframe_new[['log_passengers', 'log_riders_miles', 'log_AVG_TRAINS_O',
#        'log_AVG_TRAINS_D', 'log_peak_fare_per_mile',
#        'log_auto_tt2', 'log_auto_tt_per_mile_2',
#         'log_bus_tt_per_mile', 'log_bus_tt','log_proportionhouses_O',
#        'log_Total Households_O', 'log_proportionhouses_D', 'log_Total Households_D',
#        'log_parking_user', 'log_PARKING_CAPACITY_O', 'log_PARKING_CAPACITY_D',
#        'log_bus_line_count_O', 'log_bus_stop_count_O', 'log_bus_line_count_D',
#        'log_bus_stop_count_D', 'log_All_Jobs_O', 'log_All_Jobs_D', 'log_distance_to_core_O',
#        'log_distance_to_core_D']] = np.log(am_dataframe_new[['passengers', 'riders_miles', 'AVG_TRAINS_O',
#        'AVG_TRAINS_D', 'peak_fare_per_mile2',
#        'new_auto_tt2', 'new_auto_tt_per_mile2',
#         'bus_tt_per_mile', 'bus_tt', 'proportionhouses_O',
#        'Total Households_O', 'proportionhouses_D', 'Total Households_D',
#        'parking_user', 'PARKING_CAPACITY_O', 'PARKING_CAPACITY_D',
#        'bus_line_count_O', 'bus_stop_count_O', 'bus_line_count_D',
#        'bus_stop_count_D', 'All_Jobs_O', 'All_Jobs_D', 'distance_to_core_O',
#        'distance_to_core_D']].replace({0 : 0.01, np.nan : 0.01}))


In [110]:
am_dataframe_new[['terminal_dummy_2023_O', 'terminal_dummy_2023_D' ]] = am_dataframe_new[['terminal_dummy_2023_O', 'terminal_dummy_2023_D' ]].fillna(0)

In [111]:
am_dataframe_new.head()

Unnamed: 0,O,D,track_miles,ID_D,ID_O,pairs,passengers,riders_miles,AVG_TRAINS_D,AVG_TRAINS_O,...,log_HH_0_car_D,log_HH_0_car_O,log_HH_1_car_D,log_HH_1_car_O,log_HH_more1_car_D,log_HH_more1_car_O,log_pub_admin_jobs_D,log_pub_admin_jobs_O,log_housing_units_afford_D,log_housing_units_afford_O
0,Anacostia,Addison Road,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,53,33,...,2.833213,2.302585,5.209486,4.15104,5.370638,2.74084,0.0,5.866545,,7.09091
1,Anacostia,Archives,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,...,0.0,2.302585,0.0,4.15104,0.0,2.74084,0.0,5.866545,5.370638,7.09091
2,Anacostia,Archives,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,...,0.0,2.302585,0.0,4.15104,0.0,2.74084,0.0,5.866545,5.370638,7.09091
3,Anacostia,Arlington Cemetery,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,27,33,...,0.0,2.302585,0.0,4.15104,0.0,2.74084,0.0,5.866545,,7.09091
4,Anacostia,Ballston-MU,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,53,33,...,0.0,2.302585,4.166665,4.15104,3.850148,2.74084,5.344562,5.866545,6.347389,7.09091


In [112]:
am_dataframe_new.to_csv("Outputs/am_dataframe_new.csv")

# PM Dataframe

* pm_parking_users contains the number of passengers that used parking in the PM peak period
* pm_auto_tt contains the interpolated auto travel times for the PM peak period

In [113]:
pm_parking_users = pd.read_csv("../Data Preprocessing/Parking Users/output/pm_parking_updated.csv")
pm_auto_tt = pd.read_csv("../Data Preprocessing/Interpolated Auto Travel Times/output/pm_interpolated_auto_times.csv")

## PM Passenger Miles

* Filter for only passengers in the PM Peak
* Create an od_pair key column
* Merge track miles to passengers in the am, multiply the track miles by passengers, consolidate the columns, rename to passenger column

In [114]:
pm_passengers = passengers[passengers['period']=='PM Peak']
pm_passengers['pairs'] = pm_passengers.apply(lambda x: str(x['ID'])+str(0)+str(x['ID_1']), axis=1)

In [115]:
pmmilesmerge = track_miles2.merge(pm_passengers, left_on='pairs', right_on='pairs')
pmmilesmerge['riders_miles'] = pmmilesmerge['track_miles'] * pmmilesmerge['COUNT']
pm_dataframe_new = pmmilesmerge[['O', 'D','walkshed_filename_x', 'walkshed_filename_y', 'track_miles', 'ID_x', 'ID_y','pairs', 'COUNT', 'riders_miles', ]]
pm_dataframe_new.rename(columns={'COUNT':'passengers'}, inplace=True)
pm_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4
1,Archives,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8.31,MSTN_062,MSTN_002,MSTN_0020MSTN_062,69,573.39
2,Arlington Cemetery,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARLINGTON CEMETERY,11.39,MSTN_062,MSTN_065,MSTN_0650MSTN_062,4,45.56
3,Ballston-MU,Addison Road,ADDISON ROAD-SEAT PLEASANT,BALLSTON-MU,14.27,MSTN_062,MSTN_068,MSTN_0680MSTN_062,56,799.12
4,Benning Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,BENNING ROAD,2.46,MSTN_062,MSTN_003,MSTN_0030MSTN_062,88,216.48


## Peak Fare & Fare per Track Mile

* Merge fare_track_miles into the main PM dataframe

In [116]:
pm_dataframe_new = pm_dataframe_new.merge(fare_track_miles)
pm_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,O_MSTN_ID,D_MSTN_ID,peak_fare_per_mile2
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,MSTN_001,MSTN_062,0.36715
1,Archives,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8.31,MSTN_062,MSTN_002,MSTN_0020MSTN_062,69,573.39,MSTN_002,MSTN_062,0.445247
2,Arlington Cemetery,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARLINGTON CEMETERY,11.39,MSTN_062,MSTN_065,MSTN_0650MSTN_062,4,45.56,MSTN_065,MSTN_062,0.390694
3,Ballston-MU,Addison Road,ADDISON ROAD-SEAT PLEASANT,BALLSTON-MU,14.27,MSTN_062,MSTN_068,MSTN_0680MSTN_062,56,799.12,MSTN_068,MSTN_062,0.367905
4,Benning Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,BENNING ROAD,2.46,MSTN_062,MSTN_003,MSTN_0030MSTN_062,88,216.48,MSTN_003,MSTN_062,0.914634


## Auto Travel Time & Auto Travel Time in Minutes/Mile

* Merge PM auto travel times into the main PM dataframe

In [117]:
pm_dataframe_new = pm_dataframe_new.merge(pm_auto_tt, on='pairs')
pm_dataframe_new.head()

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,O_MSTN_ID,D_MSTN_ID,peak_fare_per_mile2,Unnamed: 0,new_auto_tt2,new_auto_tt_per_mile2
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,MSTN_001,MSTN_062,0.36715,1,13.18,1.641345
1,Archives,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8.31,MSTN_062,MSTN_002,MSTN_0020MSTN_062,69,573.39,MSTN_002,MSTN_062,0.445247,2,13.08,1.718791
2,Arlington Cemetery,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARLINGTON CEMETERY,11.39,MSTN_062,MSTN_065,MSTN_0650MSTN_062,4,45.56,MSTN_065,MSTN_062,0.390694,3,30.466196,2.67482
3,Ballston-MU,Addison Road,ADDISON ROAD-SEAT PLEASANT,BALLSTON-MU,14.27,MSTN_062,MSTN_068,MSTN_0680MSTN_062,56,799.12,MSTN_068,MSTN_062,0.367905,4,40.669998,3.116475
4,Benning Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,BENNING ROAD,2.46,MSTN_062,MSTN_003,MSTN_0030MSTN_062,88,216.48,MSTN_003,MSTN_062,0.914634,5,4.9,2.008197


## Bus Travel Time & Bus Travel Time in Minutes/Mile

* Merge bus travel times into the main PM dataframe

In [118]:
pm_dataframe_new = pm_dataframe_new.merge(bus_tt_minpmile2, on='pairs')
pm_dataframe_new

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,O_MSTN_ID,D_MSTN_ID,peak_fare_per_mile2,Unnamed: 0,new_auto_tt2,new_auto_tt_per_mile2,bus_tt,bus_tt_per_mile
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.40,MSTN_001,MSTN_062,0.367150,1,13.180000,1.641345,82.0,12.304868
1,Archives,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8.31,MSTN_062,MSTN_002,MSTN_0020MSTN_062,69,573.39,MSTN_002,MSTN_062,0.445247,2,13.080000,1.718791,84.0,11.415660
2,Arlington Cemetery,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARLINGTON CEMETERY,11.39,MSTN_062,MSTN_065,MSTN_0650MSTN_062,4,45.56,MSTN_065,MSTN_062,0.390694,3,30.466196,2.674820,122.0,12.730069
3,Ballston-MU,Addison Road,ADDISON ROAD-SEAT PLEASANT,BALLSTON-MU,14.27,MSTN_062,MSTN_068,MSTN_0680MSTN_062,56,799.12,MSTN_068,MSTN_062,0.367905,4,40.669998,3.116475,180.0,14.449949
4,Benning Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,BENNING ROAD,2.46,MSTN_062,MSTN_003,MSTN_0030MSTN_062,88,216.48,MSTN_003,MSTN_062,0.914634,5,4.900000,2.008197,32.0,12.251781
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7866,Waterfront,Woodley Park,WOODLEY PARK-ZOO/ADAMS MORGAN,WATERFRONT,4.46,MSTN_040,MSTN_039,MSTN_0390MSTN_040,65,289.90,MSTN_039,MSTN_040,0.594170,8275,,3.359744,55.0,13.022452
7867,West Falls Church,Woodley Park,WOODLEY PARK-ZOO/ADAMS MORGAN,WEST FALLS CHURCH-VT/UVA,12.38,MSTN_040,MSTN_059,MSTN_0590MSTN_040,3,37.14,MSTN_059,MSTN_040,0.351373,8276,,3.359744,130.0,13.265092
7868,West Hyattsville,Woodley Park,WOODLEY PARK-ZOO/ADAMS MORGAN,WEST HYATTSVILLE,9.74,MSTN_040,MSTN_055,MSTN_0550MSTN_040,4,38.96,MSTN_055,MSTN_040,0.369610,8277,,3.359744,72.0,12.129653
7869,Wheaton,Woodley Park,WOODLEY PARK-ZOO/ADAMS MORGAN,WHEATON,14.38,MSTN_040,MSTN_046,MSTN_0460MSTN_040,27,388.26,MSTN_046,MSTN_040,0.323366,8278,,3.359744,77.0,8.594437


## Households per Half Mile

* Merge into the PM dataframe first based on the origins and again based on the destinations
* Drop redundant columns

In [119]:
pm_dataframe_new = pm_dataframe_new.merge(households_half_mile, left_on='ID_x', right_on='MSTN')
pm_dataframe_new = pm_dataframe_new.merge(households_half_mile, left_on='ID_y', right_on='MSTN')
pm_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,new_auto_tt2,new_auto_tt_per_mile2,bus_tt,bus_tt_per_mile,MSTN_x,proportionhouses_x,Total Households_x,MSTN_y,proportionhouses_y,Total Households_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,13.18,1.641345,82.0,12.304868,MSTN_062,528.921138,1663,MSTN_001,2075.704342,3639
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,13.212824,4.181273,31.0,10.305395,MSTN_002,2214.692242,3007,MSTN_001,2075.704342,3639
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,1,6.24,...,19.669736,3.152201,,,MSTN_065,0.003184,1,MSTN_001,2075.704342,3639
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,21,191.52,...,38.189999,4.896153,95.0,12.088109,MSTN_068,11603.906894,13574,MSTN_001,2075.704342,3639
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,20,157.8,...,23.128796,2.931406,44.0,10.595066,MSTN_003,3304.80553,4621,MSTN_001,2075.704342,3639


In [120]:
pm_dataframe_new.drop(['O_MSTN_ID','D_MSTN_ID','MSTN_x','MSTN_y', 'Unnamed: 0'], axis =1, inplace=True)

## PM Trains per Hour

* Filter only for weekday services and in the PM peak
* Groupby and get the sum of trains per hour
* Merge into the PM dataframe first based on the origins and again based on the destinations
* Drop redundant columns

In [121]:
trains_per_hour2 = trains_per_hour[(trains_per_hour['ARRIVAL_PERIOD']=='PM Peak') & (trains_per_hour['SERVICETYPE']=='Weekday')]
trains_per_hour2.head()

Unnamed: 0,STATION_ID,STOP_ID,DIR_ID,SERVICETYPE,ARRIVAL_PERIOD,AVG_TRAINS
9,MSTN_001,F06,1,Weekday,PM Peak,17
11,MSTN_001,F06,2,Weekday,PM Peak,17
21,MSTN_002,F02,1,Weekday,PM Peak,31
23,MSTN_002,F02,2,Weekday,PM Peak,31
33,MSTN_003,G01,1,Weekday,PM Peak,24


In [122]:
trains_per_hour2 = trains_per_hour2.groupby(['STATION_ID']).sum().reset_index()
trains_per_hour2 = trains_per_hour2[['STATION_ID','AVG_TRAINS']]
trains_per_hour2.head()

Unnamed: 0,STATION_ID,AVG_TRAINS
0,MSTN_001,34
1,MSTN_002,62
2,MSTN_003,48
3,MSTN_004,53
4,MSTN_005,74


In [123]:
pm_dataframe_new = pm_dataframe_new.merge(trains_per_hour2, left_on='ID_x', right_on='STATION_ID')
pm_dataframe_new = pm_dataframe_new.merge(trains_per_hour2, left_on='ID_y', right_on='STATION_ID')

In [124]:
pm_dataframe_new.drop(['STATION_ID_x', 'STATION_ID_y'], axis=1, inplace=True)

## PM Parking Users

* Give station names in pm_parking_users proper MSTN IDs
* Create an OD pair column
* Subset only the pairs column and the parking_users
* Merge into the main PM dataframe

In [125]:
pm_parking_users2 = mstns.merge(pm_parking_users, left_on = 'WMATA_filename', right_on= 'START_PLACE_NAME')
pm_parking_users2 = mstns.merge(pm_parking_users2, left_on = 'WMATA_filename' , right_on='END_PLACE_NAME')
pm_parking_users2.head()

Unnamed: 0.1,ID_x,PRIMARY_NAME_FY23_x,walkshed_filename_x,WMATA_filename_x,track_miles_names_x,auto_filename_x,ID_y,PRIMARY_NAME_FY23_y,walkshed_filename_y,WMATA_filename_y,track_miles_names_y,auto_filename_y,Unnamed: 0,START_PLACE_NAME,END_PLACE_NAME,parking_user
0,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_001,Anacostia,ANACOSTIA,Anacostia,Anacostia,ANACOSTIA,76,Anacostia,Addison Road,0.0
1,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_002,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,Archives-Navy Memorial,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,163,Archives-Navy Memorial,Addison Road,14.0
2,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_065,Arlington Cemetery,ARLINGTON CEMETERY,Arlington Cemetery,Arlington Cemetery,ARLINGTON CEMETERY,253,Arlington Cemetery,Addison Road,0.0
3,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_068,Ballston-MU,BALLSTON-MU,Ballston,Ballston-MU,BALLSTON-MU,337,Ballston,Addison Road,1.0
4,MSTN_062,Addison Road,ADDISON ROAD-SEAT PLEASANT,Addison Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,MSTN_003,Benning Road,BENNING ROAD,Benning Road,Benning Road,BENNING ROAD,427,Benning Road,Addison Road,0.0


In [126]:
pm_parking_users2['pairs'] = pm_parking_users2.apply(lambda x: str(x['ID_y'])+str(0)+str(x['ID_x']), axis=1)

In [127]:
pm_parking_users2 = pm_parking_users2[['pairs', 'parking_user']]

In [128]:
pm_dataframe_new = pm_dataframe_new.merge(pm_parking_users2, on='pairs', how='left')
pm_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,new_auto_tt_per_mile2,bus_tt,bus_tt_per_mile,proportionhouses_x,Total Households_x,proportionhouses_y,Total Households_y,AVG_TRAINS_x,AVG_TRAINS_y,parking_user
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,1.641345,82.0,12.304868,528.921138,1663,2075.704342,3639,48,34,0.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,4.181273,31.0,10.305395,2214.692242,3007,2075.704342,3639,62,34,0.0
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,1,6.24,...,3.152201,,,0.003184,1,2075.704342,3639,24,34,0.0
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,21,191.52,...,4.896153,95.0,12.088109,11603.906894,13574,2075.704342,3639,50,34,0.0
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,20,157.8,...,2.931406,44.0,10.595066,3304.80553,4621,2075.704342,3639,48,34,0.0


## Number of Bus Lines

* Merge into the PM dataframe first based on the origins and again based on the destinations
* Drop redundant columns

In [129]:
pm_dataframe_new = pm_dataframe_new.merge(num_bus_lines, left_on='ID_x', right_on='MSTN')
pm_dataframe_new = pm_dataframe_new.merge(num_bus_lines, left_on='ID_y', right_on='MSTN')
pm_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,Total Households_y,AVG_TRAINS_x,AVG_TRAINS_y,parking_user,MSTN_x,bus_line_count_x,bus_stop_count_x,MSTN_y,bus_line_count_y,bus_stop_count_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,3639,48,34,0.0,MSTN_062,36.0,18.0,MSTN_001,64.0,36.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,3639,62,34,0.0,MSTN_002,86.0,50.0,MSTN_001,64.0,36.0
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,1,6.24,...,3639,24,34,0.0,MSTN_065,9.0,0.0,MSTN_001,64.0,36.0
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,21,191.52,...,3639,50,34,0.0,MSTN_068,35.0,30.0,MSTN_001,64.0,36.0
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,20,157.8,...,3639,48,34,0.0,MSTN_003,30.0,34.0,MSTN_001,64.0,36.0


In [130]:
pm_dataframe_new.drop(['MSTN_x','MSTN_y'], axis =1, inplace=True)

## Jobs per Half Mile

* Merge into the PM dataframe first based on the origins and again based on the destinations

In [131]:
pm_dataframe_new = pm_dataframe_new.merge(jobs_half_mile, left_on='walkshed_filename_x', right_on='Name_1')
pm_dataframe_new = pm_dataframe_new.merge(jobs_half_mile, left_on='walkshed_filename_y', right_on='Name_1')
pm_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,AVG_TRAINS_y,parking_user,bus_line_count_x,bus_stop_count_x,bus_line_count_y,bus_stop_count_y,Name_1_x,All_Jobs_x,Name_1_y,All_Jobs_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,34,0.0,36.0,18.0,64.0,36.0,ADDISON ROAD-SEAT PLEASANT,205.510639,ANACOSTIA,2332.571897
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,34,0.0,86.0,50.0,64.0,36.0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0,ANACOSTIA,2332.571897
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,1,6.24,...,34,0.0,9.0,0.0,64.0,36.0,ARLINGTON CEMETERY,122.300629,ANACOSTIA,2332.571897
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,21,191.52,...,34,0.0,35.0,30.0,64.0,36.0,BALLSTON-MU,51873.732661,ANACOSTIA,2332.571897
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,20,157.8,...,34,0.0,30.0,34.0,64.0,36.0,BENNING ROAD,1301.51626,ANACOSTIA,2332.571897


## Bike travel time in minutes

In [132]:
bike_tt.head()

Unnamed: 0,name_O,name_D,bike_traveltime
0,ANACOSTIA,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,22.166667
1,ANACOSTIA,CAPITOL SOUTH,11.683333
2,ANACOSTIA,EASTERN MARKET,11.808333
3,ANACOSTIA,FARRAGUT NORTH,29.066667
4,ANACOSTIA,FARRAGUT WEST,29.066667


In [133]:
pm_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,AVG_TRAINS_y,parking_user,bus_line_count_x,bus_stop_count_x,bus_line_count_y,bus_stop_count_y,Name_1_x,All_Jobs_x,Name_1_y,All_Jobs_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,34,0.0,36.0,18.0,64.0,36.0,ADDISON ROAD-SEAT PLEASANT,205.510639,ANACOSTIA,2332.571897
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,34,0.0,86.0,50.0,64.0,36.0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0,ANACOSTIA,2332.571897
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,1,6.24,...,34,0.0,9.0,0.0,64.0,36.0,ARLINGTON CEMETERY,122.300629,ANACOSTIA,2332.571897
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,21,191.52,...,34,0.0,35.0,30.0,64.0,36.0,BALLSTON-MU,51873.732661,ANACOSTIA,2332.571897
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,20,157.8,...,34,0.0,30.0,34.0,64.0,36.0,BENNING ROAD,1301.51626,ANACOSTIA,2332.571897


In [134]:
pm_dataframe_new = pm_dataframe_new.merge(bike_tt, left_on=['walkshed_filename_y', 'walkshed_filename_x'], 
                       right_on=['name_O', 'name_D'], how='left').drop(['name_O', 'name_D'], axis=1)

In [135]:
pm_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,parking_user,bus_line_count_x,bus_stop_count_x,bus_line_count_y,bus_stop_count_y,Name_1_x,All_Jobs_x,Name_1_y,All_Jobs_y,bike_traveltime
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,0.0,36.0,18.0,64.0,36.0,ADDISON ROAD-SEAT PLEASANT,205.510639,ANACOSTIA,2332.571897,
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,0.0,86.0,50.0,64.0,36.0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0,ANACOSTIA,2332.571897,22.166667


## Bikeshare Capacity

In [136]:
bikeshare_cap.head()

Unnamed: 0,Name_1,bike_cap
0,ANACOSTIA,30
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,324
2,BALLSTON-MU,133
3,BENNING ROAD,19
4,BETHESDA,90


In [137]:
pm_dataframe_new = pm_dataframe_new.merge(bikeshare_cap, left_on='walkshed_filename_x', right_on='Name_1', how='left')
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(bikeshare_cap, left_on='walkshed_filename_y', right_on='Name_1', how='left')

In [138]:
pm_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,bus_line_count_y,bus_stop_count_y,Name_1_x,All_Jobs_x,Name_1_y,All_Jobs_y,bike_traveltime,bike_cap_x,Name_1,bike_cap_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,64.0,36.0,ADDISON ROAD-SEAT PLEASANT,205.510639,ANACOSTIA,2332.571897,,,ANACOSTIA,30.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,64.0,36.0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0,ANACOSTIA,2332.571897,22.166667,324.0,ANACOSTIA,30.0


## Bus competitveness Index

In [139]:
bus_comp_index.head()

Unnamed: 0,station_name_o,station_name_d,bus_competativeness_index
0,Anacostia,Anacostia,1.0
1,Anacostia,Archives,0.783972
2,Anacostia,Archives,0.26455
3,Anacostia,Benning Road,0.479705
4,Anacostia,Brookland-CUA,0.413793


In [140]:
pm_dataframe_new = pm_dataframe_new.merge(bus_comp_index, left_on=['O', 'D'], 
                       right_on=['station_name_o', 'station_name_d'], how='left').drop(['station_name_o', 'station_name_d'], axis=1)

In [141]:
pm_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,bus_stop_count_y,Name_1_x,All_Jobs_x,Name_1_y,All_Jobs_y,bike_traveltime,bike_cap_x,Name_1,bike_cap_y,bus_competativeness_index
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,36.0,ADDISON ROAD-SEAT PLEASANT,205.510639,ANACOSTIA,2332.571897,,,ANACOSTIA,30.0,0.865438
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,36.0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0,ANACOSTIA,2332.571897,22.166667,324.0,ANACOSTIA,30.0,0.783972


## CTPP Jobs

In [142]:
# pd.set_option('display.max_rows', None)
# pd.options.display.max_rows = None
# pd.reset_option('all')
# ctpp_jobs = ctpp_jobs[['Name_1', 'proportionjobs' ]].rename(columns={'proportionjobs': 'ctpp_jobs'}).copy()

In [143]:
ctpp_jobs.head(2)

Unnamed: 0,Name_1,ctpp_jobs
0,ADDISON ROAD-SEAT PLEASANT,116.616376
1,ANACOSTIA,1763.747494


In [144]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(ctpp_jobs, left_on='walkshed_filename_x', right_on='Name_1').drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(ctpp_jobs, left_on='walkshed_filename_y', right_on='Name_1')

In [145]:
pm_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,All_Jobs_x,Name_1_y,All_Jobs_y,bike_traveltime,bike_cap_x,bike_cap_y,bus_competativeness_index,ctpp_jobs_x,Name_1,ctpp_jobs_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,205.510639,ANACOSTIA,2332.571897,,,30.0,0.865438,116.616376,ANACOSTIA,1763.747494
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,0.0,ANACOSTIA,2332.571897,22.166667,324.0,30.0,0.783972,56594.206356,ANACOSTIA,1763.747494


## Household Income below 50k

In [146]:
hh_incomebelowpoverty.head(2)

Unnamed: 0,Name_1,HH_below_50k
0,ADDISON ROAD-SEAT PLEASANT,922.4
1,ANACOSTIA,2932.3


In [147]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(hh_incomebelowpoverty, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)

pm_dataframe_new = pm_dataframe_new.merge(hh_incomebelowpoverty, left_on='walkshed_filename_y', right_on='Name_1', how='left')


In [148]:
pm_dataframe_new

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,All_Jobs_y,bike_traveltime,bike_cap_x,bike_cap_y,bus_competativeness_index,ctpp_jobs_x,ctpp_jobs_y,HH_below_50k_x,Name_1,HH_below_50k_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.40,...,2332.571897,,,30.0,0.865438,116.616376,1763.747494,922.4,ANACOSTIA,2932.3
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,2332.571897,22.166667,324.0,30.0,0.783972,56594.206356,1763.747494,548.4,ANACOSTIA,2932.3
2,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,2332.571897,22.166667,324.0,30.0,0.264550,56594.206356,1763.747494,548.4,ANACOSTIA,2932.3
3,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,1,6.24,...,2332.571897,,,30.0,0.774713,35.889042,1763.747494,0.0,ANACOSTIA,2932.3
4,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,21,191.52,...,2332.571897,,133.0,30.0,0.361587,7944.309562,1763.747494,933.3,ANACOSTIA,2932.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8996,West Falls Church,Wiehle,WIEHLE-RESTON EAST,WEST FALLS CHURCH-VT/UVA,14.70,MSTN_091,MSTN_059,MSTN_0590MSTN_091,4,58.80,...,388.037340,,41.0,19.0,,1874.160494,496.836388,266.3,WEST FALLS CHURCH-VT/UVA,258.4
8997,West Falls Church,Woodley Park,WOODLEY PARK-ZOO/ADAMS MORGAN,WEST FALLS CHURCH-VT/UVA,12.38,MSTN_040,MSTN_059,MSTN_0590MSTN_040,3,37.14,...,388.037340,,80.0,19.0,0.710445,2435.647230,496.836388,712.2,WEST FALLS CHURCH-VT/UVA,258.4
8998,West Falls Church,Anacostia,ANACOSTIA,WEST FALLS CHURCH-VT/UVA,13.68,MSTN_001,MSTN_059,MSTN_0590MSTN_001,2,27.36,...,388.037340,,30.0,19.0,0.524834,1763.747494,496.836388,2932.3,WEST FALLS CHURCH-VT/UVA,258.4
8999,West Falls Church,Greensboro,GREENSBORO,WEST FALLS CHURCH-VT/UVA,8.20,MSTN_089,MSTN_059,MSTN_0590MSTN_089,2,16.40,...,388.037340,,50.0,19.0,0.688406,4214.167739,496.836388,208.0,WEST FALLS CHURCH-VT/UVA,258.4


## Number of hotels within the walkshed

In [149]:
hotels.head(2)

Unnamed: 0,Name_1,hotelcount
0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,4
1,BETHESDA,1


In [150]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(hotels, left_on='walkshed_filename_x', right_on='Name_1', how='left').drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(hotels, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of entertainment locations within the walkshed

In [151]:
entertainment.head()

Unnamed: 0,Name_1,entertainmentcount
0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8
1,BALLSTON-MU,1
2,BETHESDA,3
3,BROOKLAND-CUA,4
4,CAPITOL SOUTH,1


In [152]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(entertainment, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(entertainment, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of restaurants located within each walksheds

In [153]:
restaurant.head(2)

Unnamed: 0,Name_1,restaurantcount
0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,4
1,BETHESDA,1


In [154]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(restaurant, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(restaurant, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Station is an Inter-city hub (by ridership)

In [155]:
inter_city.head(2)

Unnamed: 0,Name_1,intercityhub
0,ADDISON ROAD,14657.0
1,ANACOSTIA,25523.0


In [156]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(inter_city, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(inter_city, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Intersection density

In [157]:
intersection_density.head(2)

Unnamed: 0,Name_1,prop_str_dens
0,ADDISON ROAD-SEAT PLEASANT,87.782487
1,ANACOSTIA,71.775767


In [158]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(intersection_density, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(intersection_density, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Job Accessibility by Transit

In [159]:
job_access_by_transit.head(2)

Unnamed: 0,Name_1,Jobs_transit
0,ADDISON ROAD-SEAT PLEASANT,15074627.0
1,ANACOSTIA,47484669.0


In [160]:
job_access_by_transit = job_access_by_transit[['Name_1', 'Jobs_transit']]

In [161]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(job_access_by_transit, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(job_access_by_transit, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of convenience stores within each walksheds

In [162]:
convenience_store.head(2)

Unnamed: 0,Name_1,convenience_storecount
0,ANACOSTIA,3
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,1


In [163]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(convenience_store, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(convenience_store, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## High School Enrollment

In [164]:
highschool_enrollment.head(2)

Unnamed: 0,Name_1,highschoolenroll
0,ANACOSTIA,1593.0
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,728.0


In [165]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(highschool_enrollment, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(highschool_enrollment, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## College Enrollment within the walkshed

In [166]:
college_enrollment.head()

Unnamed: 0,Name_1,college_enroll
0,BROOKLAND-CUA,5790.0
1,CLARENDON,8.0
2,COURT HOUSE,4797.0
3,CRYSTAL CITY,580.0
4,DUPONT CIRCLE,340.0


In [167]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(college_enrollment, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(college_enrollment, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Household with zero cars

In [168]:
hh_0_car.head(2)

Unnamed: 0,Name_1,HH_0_car
0,ADDISON ROAD-SEAT PLEASANT,16.0
1,ANACOSTIA,9.0


In [169]:
# hh_0_car['HH_0_car'] = hh_0_car['Num of Household with No Car']
# hh_0_car.drop('Num of Household with No Car', axis=1, inplace=True)

In [170]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(hh_0_car, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(hh_0_car, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Households with exactly 1 car

In [171]:
hh_1_car.head(2)

Unnamed: 0,Name_1,HH_1_car
0,ADDISON ROAD-SEAT PLEASANT,182.0
1,ANACOSTIA,62.5


In [172]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(hh_1_car, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(hh_1_car, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Households with more than 1 car 

In [173]:
hh_morethan1_car.head(2)

Unnamed: 0,Name_1,HH_more1_car
0,ADDISON ROAD-SEAT PLEASANT,214.0
1,ANACOSTIA,14.5


In [174]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(hh_morethan1_car, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(hh_morethan1_car, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Public Admin jobs

In [175]:
public_admin_jobs.head(2)

Unnamed: 0,Name_1,pub_admin_jobs
0,ADDISON ROAD-SEAT PLEASANT,0.0
1,ANACOSTIA,352.027012


In [176]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(public_admin_jobs, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(public_admin_jobs, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of Senior housing facilities within each walksheds

In [177]:
senior_housing_afford.head(2)

Unnamed: 0,Name_1,housing_units_afford
0,ANACOSTIA,1200
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,214


In [178]:
pm_dataframe_new.drop('Name_1', axis=1, inplace=True)
pm_dataframe_new = pm_dataframe_new.merge(senior_housing_afford, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
pm_dataframe_new = pm_dataframe_new.merge(senior_housing_afford, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Terminal Station Dummy

* Merge into the PM dataframe first based on the origins and again based on the destinations

In [179]:
pm_dataframe_new = pm_dataframe_new.merge(terminal_station2, left_on='ID_x', right_on='MSTN')
pm_dataframe_new = pm_dataframe_new.merge(terminal_station2, left_on='ID_y', right_on='MSTN')

## Log of Variables and Export

* Turn column names with _x or _y to _O and _D
* Drop redundant columns
* Fill 0s and NAs with 0.01 to prevent errors with taking the natural log
* Fill terminal dummy columns with 0

In [180]:
pm_dataframe_new.columns = pm_dataframe_new.columns.str.replace('_x','_D')
pm_dataframe_new.columns = pm_dataframe_new.columns.str.replace('_y','_O')
pm_dataframe_new.columns

Index(['O', 'D', 'walkshed_filename_D', 'walkshed_filename_O', 'track_miles',
       'ID_D', 'ID_O', 'pairs', 'passengers', 'riders_miles',
       'peak_fare_per_mile2', 'new_auto_tt2', 'new_auto_tt_per_mile2',
       'bus_tt', 'bus_tt_per_mile', 'proportionhouses_D', 'Total Households_D',
       'proportionhouses_O', 'Total Households_O', 'AVG_TRAINS_D',
       'AVG_TRAINS_O', 'parking_user', 'bus_line_count_D', 'bus_stop_count_D',
       'bus_line_count_O', 'bus_stop_count_O', 'Name_1_D', 'All_Jobs_D',
       'Name_1_O', 'All_Jobs_O', 'bike_traveltime', 'bike_cap_D', 'bike_cap_O',
       'bus_competativeness_index', 'ctpp_jobs_D', 'ctpp_jobs_O',
       'HH_below_50k_D', 'HH_below_50k_O', 'hotelcount_D', 'hotelcount_O',
       'entertainmentcount_D', 'entertainmentcount_O', 'restaurantcount_D',
       'restaurantcount_O', 'intercityhub_D', 'intercityhub_O',
       'prop_str_dens_D', 'prop_str_dens_O', 'Jobs_transit_D',
       'Jobs_transit_O', 'convenience_storecount_D',
       'conve

In [181]:
pm_dataframe_new.drop(['Name_1', 'Name_1_O','Name_1_D', 'MSTN_O', 'MSTN_D', ], axis =1, inplace=True)

In [182]:
pm_dataframe_new.columns[8:]

Index(['passengers', 'riders_miles', 'peak_fare_per_mile2', 'new_auto_tt2',
       'new_auto_tt_per_mile2', 'bus_tt', 'bus_tt_per_mile',
       'proportionhouses_D', 'Total Households_D', 'proportionhouses_O',
       'Total Households_O', 'AVG_TRAINS_D', 'AVG_TRAINS_O', 'parking_user',
       'bus_line_count_D', 'bus_stop_count_D', 'bus_line_count_O',
       'bus_stop_count_O', 'All_Jobs_D', 'All_Jobs_O', 'bike_traveltime',
       'bike_cap_D', 'bike_cap_O', 'bus_competativeness_index', 'ctpp_jobs_D',
       'ctpp_jobs_O', 'HH_below_50k_D', 'HH_below_50k_O', 'hotelcount_D',
       'hotelcount_O', 'entertainmentcount_D', 'entertainmentcount_O',
       'restaurantcount_D', 'restaurantcount_O', 'intercityhub_D',
       'intercityhub_O', 'prop_str_dens_D', 'prop_str_dens_O',
       'Jobs_transit_D', 'Jobs_transit_O', 'convenience_storecount_D',
       'convenience_storecount_O', 'highschoolenroll_D', 'highschoolenroll_O',
       'college_enroll_D', 'college_enroll_O', 'HH_0_car_D', 'HH_0_c

In [183]:
cols_transform = pm_dataframe_new.columns[8:]

for col in cols_transform:
    pm_dataframe_new[f'log_{col}'] = np.log1p(pm_dataframe_new[col])

In [184]:
pm_dataframe_new[[ 'terminal_dummy_2023_O', 'terminal_dummy_2023_D']] = pm_dataframe_new[[ 'terminal_dummy_2023_O', 'terminal_dummy_2023_D' ]].fillna(0)

In [185]:
pm_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_D,walkshed_filename_O,track_miles,ID_D,ID_O,pairs,passengers,riders_miles,...,log_HH_1_car_D,log_HH_1_car_O,log_HH_more1_car_D,log_HH_more1_car_O,log_pub_admin_jobs_D,log_pub_admin_jobs_O,log_housing_units_afford_D,log_housing_units_afford_O,log_terminal_dummy_2023_D,log_terminal_dummy_2023_O
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,24,248.4,...,5.209486,4.15104,5.370638,2.74084,0.0,5.866545,,7.09091,,
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,0.0,4.15104,0.0,2.74084,0.0,5.866545,5.370638,7.09091,,
2,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,99,312.84,...,0.0,4.15104,0.0,2.74084,0.0,5.866545,5.370638,7.09091,,
3,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,1,6.24,...,0.0,4.15104,0.0,2.74084,0.0,5.866545,,7.09091,,
4,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,21,191.52,...,4.166665,4.15104,3.850148,2.74084,5.344562,5.866545,6.347389,7.09091,,


In [186]:
pm_dataframe_new.to_csv("Outputs/pm_dataframe_new.csv")

# Off Peak Dataframe

* off_parking_users contains the number of passengers that used parking in the off peak period
* off_fare_track_miles contains the fare per track mile of each OD pair in the Off-peak period
* nite_wkend_jobs contains the number of night and weekend jobs within a walkshed
* median_income contains the median household income of each walkshed
* off_auto_tt contains the interpolated auto travel times for the off peak period

In [187]:
off_parking_users = pd.read_csv("../Data Preprocessing/Parking Users/output/off_parking_updated.csv")
off_fare_track_miles = pd.read_excel("../Data Preprocessing/Fare per Track Mile/output/railOD_off_peak_fare_per_mile.xlsx")
nite_wkend_jobs = pd.read_excel("../Data Preprocessing/Proportion of Night and Weekend Jobs/output/Proportional_night_weekend_jobs_ML.xlsx")
median_income = pd.read_excel("../Data Preprocessing/Median Household Income/output/proportional_walkshed_household_income_updated.xlsx")
off_auto_tt = pd.read_csv("../Data Preprocessing/Interpolated Auto Travel Times/output/off_interpolated_auto_times.csv")

## Off Peak Passenger Miles

* Filter for only passengers in the Off Peak
* Create an od_pair key column
* Merge track miles to passengers in the am, multiply the track miles by passengers, consolidate the columns, rename to passenger column

In [188]:
off_passengers = passengers[passengers['period']=='Off Peak']
off_passengers['pairs'] = off_passengers.apply(lambda x: str(x['ID'])+str(0)+str(x['ID_1']), axis=1)

In [189]:
offmilesmerge = track_miles2.merge(off_passengers, left_on='pairs', right_on='pairs')
offmilesmerge['riders_miles'] = offmilesmerge['track_miles'] * offmilesmerge['COUNT']
off_dataframe_new = offmilesmerge[['O', 'D','walkshed_filename_x', 'walkshed_filename_y', 'track_miles', 'ID_x', 'ID_y','pairs', 'COUNT', 'riders_miles', ]]
off_dataframe_new.rename(columns={'COUNT':'passengers'}, inplace=True)
off_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2
1,Archives,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8.31,MSTN_062,MSTN_002,MSTN_0020MSTN_062,20,166.2
2,Arlington Cemetery,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARLINGTON CEMETERY,11.39,MSTN_062,MSTN_065,MSTN_0650MSTN_062,2,22.78
3,Ballston-MU,Addison Road,ADDISON ROAD-SEAT PLEASANT,BALLSTON-MU,14.27,MSTN_062,MSTN_068,MSTN_0680MSTN_062,25,356.75
4,Benning Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,BENNING ROAD,2.46,MSTN_062,MSTN_003,MSTN_0030MSTN_062,109,268.14


## Peak Fare & Fare per Track Mile

* Subset only the station ID columns from fare_track_miles
* Create a unique OD pair column
* Merge into the main Off Peak dataframe

In [190]:
off_fare_track_miles= off_fare_track_miles[['pairs2', 'O_MSTN_ID', 'D_MSTN_ID', 'off_peak_fare_per_mile']]
off_fare_track_miles['pairs'] = off_fare_track_miles.apply(lambda x: str(x['O_MSTN_ID'])+str(0)+str(x['D_MSTN_ID']), axis=1)

In [191]:
off_dataframe_new = off_dataframe_new.merge(off_fare_track_miles)
off_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,pairs2,O_MSTN_ID,D_MSTN_ID,off_peak_fare_per_mile
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,1062,MSTN_001,MSTN_062,0.304348
1,Archives,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8.31,MSTN_062,MSTN_002,MSTN_0020MSTN_062,20,166.2,2062,MSTN_002,MSTN_062,0.373045
2,Arlington Cemetery,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARLINGTON CEMETERY,11.39,MSTN_062,MSTN_065,MSTN_0650MSTN_062,2,22.78,65062,MSTN_065,MSTN_062,0.320457
3,Ballston-MU,Addison Road,ADDISON ROAD-SEAT PLEASANT,BALLSTON-MU,14.27,MSTN_062,MSTN_068,MSTN_0680MSTN_062,25,356.75,68062,MSTN_068,MSTN_062,0.269797
4,Benning Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,BENNING ROAD,2.46,MSTN_062,MSTN_003,MSTN_0030MSTN_062,109,268.14,3062,MSTN_003,MSTN_062,0.813008


## Auto Travel Time & Auto Travel Time in Minutes/Mile

* Merge into the main Off Peak dataframe

In [192]:
off_dataframe_new = off_dataframe_new.merge(off_auto_tt, on='pairs')
off_dataframe_new.head()

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,pairs2,O_MSTN_ID,D_MSTN_ID,off_peak_fare_per_mile,Unnamed: 0,new_auto_tt2,new_auto_tt_per_mile2
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,1062,MSTN_001,MSTN_062,0.304348,1,13.1,1.2657
1,Archives,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8.31,MSTN_062,MSTN_002,MSTN_0020MSTN_062,20,166.2,2062,MSTN_002,MSTN_062,0.373045,2,13.23,1.592058
2,Arlington Cemetery,Addison Road,ADDISON ROAD-SEAT PLEASANT,ARLINGTON CEMETERY,11.39,MSTN_062,MSTN_065,MSTN_0650MSTN_062,2,22.78,65062,MSTN_065,MSTN_062,0.320457,3,21.349543,1.874411
3,Ballston-MU,Addison Road,ADDISON ROAD-SEAT PLEASANT,BALLSTON-MU,14.27,MSTN_062,MSTN_068,MSTN_0680MSTN_062,25,356.75,68062,MSTN_068,MSTN_062,0.269797,4,33.349998,2.337071
4,Benning Road,Addison Road,ADDISON ROAD-SEAT PLEASANT,BENNING ROAD,2.46,MSTN_062,MSTN_003,MSTN_0030MSTN_062,109,268.14,3062,MSTN_003,MSTN_062,0.813008,5,4.91,1.995935


## Households per Half Mile

c
* Drop redundant columns

In [193]:
off_dataframe_new = off_dataframe_new.merge(households_half_mile, left_on='ID_x', right_on='MSTN')
off_dataframe_new = off_dataframe_new.merge(households_half_mile, left_on='ID_y', right_on='MSTN')
off_dataframe_new.head()

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,off_peak_fare_per_mile,Unnamed: 0,new_auto_tt2,new_auto_tt_per_mile2,MSTN_x,proportionhouses_x,Total Households_x,MSTN_y,proportionhouses_y,Total Households_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,...,0.304348,1,13.1,1.2657,MSTN_062,528.921138,1663,MSTN_001,2075.704342,3639
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,0.632911,183,9.632492,3.048257,MSTN_002,2214.692242,3007,MSTN_001,2075.704342,3639
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,3,18.72,...,0.400641,274,14.017488,2.246392,MSTN_065,0.003184,1,MSTN_001,2075.704342,3639
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,23,209.76,...,0.339912,365,26.440001,2.899123,MSTN_068,11603.906894,13574,MSTN_001,2075.704342,3639
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,19,149.91,...,0.329531,456,16.366593,2.074346,MSTN_003,3304.80553,4621,MSTN_001,2075.704342,3639


In [194]:
off_dataframe_new.drop(['O_MSTN_ID','D_MSTN_ID','MSTN_x','MSTN_y'], axis =1, inplace=True)

## Off Peak Parking Users

* Give station names in pm_parking_users proper MSTN IDs
* Create an OD pair column
* Subset only the pairs column and the parking_users
* Merge into the main PM dataframe

In [195]:
off_parking_users2 = mstns.merge(off_parking_users, left_on = 'WMATA_filename', right_on= 'START_PLACE_NAME')
off_parking_users2 = mstns.merge(off_parking_users2, left_on = 'WMATA_filename' , right_on='END_PLACE_NAME')

In [196]:
off_parking_users2['pairs'] = off_parking_users2.apply(lambda x: str(x['ID_y'])+str(0)+str(x['ID_x']), axis=1)

In [197]:
off_parking_users2 = off_parking_users2[['pairs', 'parking_user']]

In [198]:
off_dataframe_new = off_dataframe_new.merge(off_parking_users2, on='pairs', how='left')
off_dataframe_new.head()

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,pairs2,off_peak_fare_per_mile,Unnamed: 0,new_auto_tt2,new_auto_tt_per_mile2,proportionhouses_x,Total Households_x,proportionhouses_y,Total Households_y,parking_user
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,1062,0.304348,1,13.1,1.2657,528.921138,1663,2075.704342,3639,0.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,102,0.632911,183,9.632492,3.048257,2214.692242,3007,2075.704342,3639,6.0
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,3,18.72,1065,0.400641,274,14.017488,2.246392,0.003184,1,2075.704342,3639,0.0
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,23,209.76,1068,0.339912,365,26.440001,2.899123,11603.906894,13574,2075.704342,3639,0.0
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,19,149.91,103,0.329531,456,16.366593,2.074346,3304.80553,4621,2075.704342,3639,0.0


## Number of Bus Lines

* Merge into the Off Peak dataframe first based on the origins and again based on the destinations
* Drop reduncant columns

In [199]:
off_dataframe_new = off_dataframe_new.merge(num_bus_lines, left_on='ID_x', right_on='MSTN')
off_dataframe_new = off_dataframe_new.merge(num_bus_lines, left_on='ID_y', right_on='MSTN')
off_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,Total Households_x,proportionhouses_y,Total Households_y,parking_user,MSTN_x,bus_line_count_x,bus_stop_count_x,MSTN_y,bus_line_count_y,bus_stop_count_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,...,1663,2075.704342,3639,0.0,MSTN_062,36.0,18.0,MSTN_001,64.0,36.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,3007,2075.704342,3639,6.0,MSTN_002,86.0,50.0,MSTN_001,64.0,36.0
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,3,18.72,...,1,2075.704342,3639,0.0,MSTN_065,9.0,0.0,MSTN_001,64.0,36.0
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,23,209.76,...,13574,2075.704342,3639,0.0,MSTN_068,35.0,30.0,MSTN_001,64.0,36.0
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,19,149.91,...,4621,2075.704342,3639,0.0,MSTN_003,30.0,34.0,MSTN_001,64.0,36.0


In [200]:
off_dataframe_new.drop(['MSTN_x','MSTN_y'], axis =1, inplace=True)

## Night and Weekend Jobs

* Merge into the Off Peak dataframe first based on the origins and again based on the destinations

In [201]:
off_dataframe_new = off_dataframe_new.merge(nite_wkend_jobs, left_on='walkshed_filename_x', right_on='Name_1')
off_dataframe_new = off_dataframe_new.merge(nite_wkend_jobs, left_on='walkshed_filename_y', right_on='Name_1')
off_dataframe_new.head()

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,Total Households_y,parking_user,bus_line_count_x,bus_stop_count_x,bus_line_count_y,bus_stop_count_y,Name_1_x,Proportion night weekend jobs_x,Name_1_y,Proportion night weekend jobs_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,...,3639,0.0,36.0,18.0,64.0,36.0,ADDISON ROAD-SEAT PLEASANT,10.449694,ANACOSTIA,134.784916
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,3639,6.0,86.0,50.0,64.0,36.0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0,ANACOSTIA,134.784916
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,3,18.72,...,3639,0.0,9.0,0.0,64.0,36.0,ARLINGTON CEMETERY,63.155243,ANACOSTIA,134.784916
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,23,209.76,...,3639,0.0,35.0,30.0,64.0,36.0,BALLSTON-MU,2731.168649,ANACOSTIA,134.784916
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,19,149.91,...,3639,0.0,30.0,34.0,64.0,36.0,BENNING ROAD,309.339252,ANACOSTIA,134.784916


## Median Household Income

* Rename the station name column to make it easier for merging 
* Merge into the Off Peak dataframe first based on the origins and again based on the destinations
* Fill any NAs with 0

In [202]:
median_income.rename(columns={'Name_1':'Name_2'}, inplace=True)

In [203]:
off_dataframe_new = off_dataframe_new.merge(median_income, left_on='walkshed_filename_x', right_on='Name_2')
off_dataframe_new = off_dataframe_new.merge(median_income, left_on='walkshed_filename_y', right_on='Name_2')
off_dataframe_new.head()

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,Name_1_x,Proportion night weekend jobs_x,Name_1_y,Proportion night weekend jobs_y,Unnamed: 0_y,Name_2_x,Median household income_x,Unnamed: 0,Name_2_y,Median household income_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,...,ADDISON ROAD-SEAT PLEASANT,10.449694,ANACOSTIA,134.784916,0,ADDISON ROAD-SEAT PLEASANT,77702.0,1,ANACOSTIA,36323.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0,ANACOSTIA,134.784916,2,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,135011.0,1,ANACOSTIA,36323.0
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,3,18.72,...,ARLINGTON CEMETERY,63.155243,ANACOSTIA,134.784916,3,ARLINGTON CEMETERY,,1,ANACOSTIA,36323.0
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,23,209.76,...,BALLSTON-MU,2731.168649,ANACOSTIA,134.784916,5,BALLSTON-MU,127512.0,1,ANACOSTIA,36323.0
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,19,149.91,...,BENNING ROAD,309.339252,ANACOSTIA,134.784916,6,BENNING ROAD,52158.0,1,ANACOSTIA,36323.0


In [204]:
off_dataframe_new[['Median household income_x','Median household income_y', ]] = off_dataframe_new[['Median household income_x','Median household income_y', ]].fillna(0)

## Terminal Station Dummy

* Merge into the Off Peak dataframe first based on the origins and again based on the destinations

In [205]:
off_dataframe_new = off_dataframe_new.merge(terminal_station2, left_on='ID_x', right_on='MSTN')
off_dataframe_new = off_dataframe_new.merge(terminal_station2, left_on='ID_y', right_on='MSTN')

## Bike travel time in minutes

In [206]:
bike_tt.head()

Unnamed: 0,name_O,name_D,bike_traveltime
0,ANACOSTIA,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,22.166667
1,ANACOSTIA,CAPITOL SOUTH,11.683333
2,ANACOSTIA,EASTERN MARKET,11.808333
3,ANACOSTIA,FARRAGUT NORTH,29.066667
4,ANACOSTIA,FARRAGUT WEST,29.066667


In [207]:
off_dataframe_new.head()

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,Unnamed: 0_y,Name_2_x,Median household income_x,Unnamed: 0,Name_2_y,Median household income_y,MSTN_x,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,...,0,ADDISON ROAD-SEAT PLEASANT,77702.0,1,ANACOSTIA,36323.0,MSTN_062,,MSTN_001,
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,2,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,135011.0,1,ANACOSTIA,36323.0,MSTN_002,,MSTN_001,
2,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,3,18.72,...,3,ARLINGTON CEMETERY,0.0,1,ANACOSTIA,36323.0,MSTN_065,,MSTN_001,
3,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,23,209.76,...,5,BALLSTON-MU,127512.0,1,ANACOSTIA,36323.0,MSTN_068,,MSTN_001,
4,Anacostia,Benning Road,BENNING ROAD,ANACOSTIA,7.89,MSTN_003,MSTN_001,MSTN_0010MSTN_003,19,149.91,...,6,BENNING ROAD,52158.0,1,ANACOSTIA,36323.0,MSTN_003,,MSTN_001,


In [208]:
off_dataframe_new = off_dataframe_new.merge(bike_tt, left_on=['walkshed_filename_y', 'walkshed_filename_x'], 
                       right_on=['name_O', 'name_D'], how='left').drop(['name_O', 'name_D'], axis=1)

In [209]:
off_dataframe_new.head(2)

Unnamed: 0.1,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,Name_2_x,Median household income_x,Unnamed: 0,Name_2_y,Median household income_y,MSTN_x,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y,bike_traveltime
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,...,ADDISON ROAD-SEAT PLEASANT,77702.0,1,ANACOSTIA,36323.0,MSTN_062,,MSTN_001,,
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,135011.0,1,ANACOSTIA,36323.0,MSTN_002,,MSTN_001,,22.166667


## Bikeshare Capacity

In [210]:
bikeshare_cap.head()

Unnamed: 0,Name_1,bike_cap
0,ANACOSTIA,30
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,324
2,BALLSTON-MU,133
3,BENNING ROAD,19
4,BETHESDA,90


In [211]:
off_dataframe_new = off_dataframe_new.merge(bikeshare_cap, left_on='walkshed_filename_x', right_on='Name_1', how='left')
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(bikeshare_cap, left_on='walkshed_filename_y', right_on='Name_1', how='left')

In [212]:
off_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,Name_2_y,Median household income_y,MSTN_x,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y,bike_traveltime,bike_cap_x,Name_1,bike_cap_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,...,ANACOSTIA,36323.0,MSTN_062,,MSTN_001,,,,ANACOSTIA,30.0
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,ANACOSTIA,36323.0,MSTN_002,,MSTN_001,,22.166667,324.0,ANACOSTIA,30.0


## Bus competitveness Index

In [213]:
bus_comp_index.head()

Unnamed: 0,station_name_o,station_name_d,bus_competativeness_index
0,Anacostia,Anacostia,1.0
1,Anacostia,Archives,0.783972
2,Anacostia,Archives,0.26455
3,Anacostia,Benning Road,0.479705
4,Anacostia,Brookland-CUA,0.413793


In [214]:
off_dataframe_new = off_dataframe_new.merge(bus_comp_index, left_on=['O', 'D'], 
                       right_on=['station_name_o', 'station_name_d'], how='left').drop(['station_name_o', 'station_name_d'], axis=1)

In [215]:
off_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,Median household income_y,MSTN_x,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y,bike_traveltime,bike_cap_x,Name_1,bike_cap_y,bus_competativeness_index
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,...,36323.0,MSTN_062,,MSTN_001,,,,ANACOSTIA,30.0,0.865438
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,36323.0,MSTN_002,,MSTN_001,,22.166667,324.0,ANACOSTIA,30.0,0.783972


## CTPP Jobs

In [216]:
# pd.set_option('display.max_rows', None)
# pd.options.display.max_rows = None
# pd.reset_option('all')
# ctpp_jobs = ctpp_jobs[['Name_1', 'proportionjobs' ]].rename(columns={'proportionjobs': 'ctpp_jobs'}).copy()

In [217]:
ctpp_jobs.head(2)

Unnamed: 0,Name_1,ctpp_jobs
0,ADDISON ROAD-SEAT PLEASANT,116.616376
1,ANACOSTIA,1763.747494


In [218]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(ctpp_jobs, left_on='walkshed_filename_x', right_on='Name_1').drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(ctpp_jobs, left_on='walkshed_filename_y', right_on='Name_1')

In [219]:
off_dataframe_new.head(2)

Unnamed: 0,O,D,walkshed_filename_x,walkshed_filename_y,track_miles,ID_x,ID_y,pairs,passengers,riders_miles,...,terminal_dummy_2023_x,MSTN_y,terminal_dummy_2023_y,bike_traveltime,bike_cap_x,bike_cap_y,bus_competativeness_index,ctpp_jobs_x,Name_1,ctpp_jobs_y
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.2,...,,MSTN_001,,,,30.0,0.865438,116.616376,ANACOSTIA,1763.747494
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,,MSTN_001,,22.166667,324.0,30.0,0.783972,56594.206356,ANACOSTIA,1763.747494


## Household Income below 50k

In [220]:
hh_incomebelowpoverty.head(2)

Unnamed: 0,Name_1,HH_below_50k
0,ADDISON ROAD-SEAT PLEASANT,922.4
1,ANACOSTIA,2932.3


In [221]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)

off_dataframe_new = off_dataframe_new.merge(hh_incomebelowpoverty, left_on='walkshed_filename_x', right_on='Name_1', how='left').drop('Name_1', axis=1)

off_dataframe_new = off_dataframe_new.merge(hh_incomebelowpoverty, left_on='walkshed_filename_y', right_on='Name_1', how='left')


## Number of hotels within the walkshed

In [222]:
hotels.head(2)

Unnamed: 0,Name_1,hotelcount
0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,4
1,BETHESDA,1


In [223]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(hotels, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(hotels, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of entertainment locations within the walkshed

In [224]:
entertainment.head()

Unnamed: 0,Name_1,entertainmentcount
0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,8
1,BALLSTON-MU,1
2,BETHESDA,3
3,BROOKLAND-CUA,4
4,CAPITOL SOUTH,1


In [225]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(entertainment, left_on='walkshed_filename_x', right_on='Name_1', how='left').drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(entertainment, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of restaurants located within each walksheds

In [226]:
restaurant.head(2)

Unnamed: 0,Name_1,restaurantcount
0,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,4
1,BETHESDA,1


In [227]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(restaurant, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(restaurant, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Station is an Inter-city hub (by ridership)

In [228]:
inter_city.head(2)

Unnamed: 0,Name_1,intercityhub
0,ADDISON ROAD,14657.0
1,ANACOSTIA,25523.0


In [229]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(inter_city, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(inter_city, left_on='walkshed_filename_y', right_on='Name_1', how='left' )

## Intersection density

In [230]:
intersection_density.head(2)

Unnamed: 0,Name_1,prop_str_dens
0,ADDISON ROAD-SEAT PLEASANT,87.782487
1,ANACOSTIA,71.775767


In [231]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(intersection_density, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(intersection_density, left_on='walkshed_filename_y', right_on='Name_1', how='left' )

## Job Accessibility by Transit

In [232]:
job_access_by_transit.head(2)

Unnamed: 0,Name_1,Jobs_transit
0,ADDISON ROAD-SEAT PLEASANT,15074627.0
1,ANACOSTIA,47484669.0


In [233]:
job_access_by_transit = job_access_by_transit[['Name_1', 'Jobs_transit']]

In [234]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(job_access_by_transit, left_on='walkshed_filename_x', right_on='Name_1', how='left').drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(job_access_by_transit, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of convenience stores within each walksheds

In [235]:
convenience_store.head(2)

Unnamed: 0,Name_1,convenience_storecount
0,ANACOSTIA,3
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,1


In [236]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(convenience_store, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(convenience_store, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## High School Enrollment

In [237]:
highschool_enrollment.head(2)

Unnamed: 0,Name_1,highschoolenroll
0,ANACOSTIA,1593.0
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,728.0


In [238]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(highschool_enrollment, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(highschool_enrollment, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## College Enrollment within the walkshed

In [239]:
college_enrollment.head()

Unnamed: 0,Name_1,college_enroll
0,BROOKLAND-CUA,5790.0
1,CLARENDON,8.0
2,COURT HOUSE,4797.0
3,CRYSTAL CITY,580.0
4,DUPONT CIRCLE,340.0


In [240]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(college_enrollment, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(college_enrollment, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Household with zero cars

In [241]:
hh_0_car.head(2)

Unnamed: 0,Name_1,HH_0_car
0,ADDISON ROAD-SEAT PLEASANT,16.0
1,ANACOSTIA,9.0


In [242]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(hh_0_car, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(hh_0_car, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Households with exactly 1 car

In [243]:
hh_1_car.head(2)

Unnamed: 0,Name_1,HH_1_car
0,ADDISON ROAD-SEAT PLEASANT,182.0
1,ANACOSTIA,62.5


In [244]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(hh_1_car, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(hh_1_car, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Households with more than 1 car 

In [245]:
hh_morethan1_car.head(2)

Unnamed: 0,Name_1,HH_more1_car
0,ADDISON ROAD-SEAT PLEASANT,214.0
1,ANACOSTIA,14.5


In [246]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(hh_morethan1_car, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(hh_morethan1_car, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Public Admin jobs

In [247]:
public_admin_jobs.head(2)

Unnamed: 0,Name_1,pub_admin_jobs
0,ADDISON ROAD-SEAT PLEASANT,0.0
1,ANACOSTIA,352.027012


In [248]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(public_admin_jobs, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(public_admin_jobs, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Number of Senior housing facilities within each walksheds

In [249]:
senior_housing_afford.head(2)

Unnamed: 0,Name_1,housing_units_afford
0,ANACOSTIA,1200
1,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,214


In [250]:
off_dataframe_new.drop('Name_1', axis=1, inplace=True)
off_dataframe_new = off_dataframe_new.merge(senior_housing_afford, left_on='walkshed_filename_x', right_on='Name_1', how='left' ).drop('Name_1', axis=1)
off_dataframe_new = off_dataframe_new.merge(senior_housing_afford, left_on='walkshed_filename_y', right_on='Name_1', how='left')

## Log of Variables and Export

* Turn column names with _x or _y to _O and _D
* Drop redundant columns
* Fill 0s and NAs with 0.01 to prevent errors with taking the natural log
* Fill terminal dummy columns with 0

In [251]:
off_dataframe_new.columns = off_dataframe_new.columns.str.replace('_x','_D')
off_dataframe_new.columns = off_dataframe_new.columns.str.replace('_y','_O')
off_dataframe_new.columns

Index(['O', 'D', 'walkshed_filename_D', 'walkshed_filename_O', 'track_miles',
       'ID_D', 'ID_O', 'pairs', 'passengers', 'riders_miles', 'pairs2',
       'off_peak_fare_per_mile', 'Unnamed: 0_D', 'new_auto_tt2',
       'new_auto_tt_per_mile2', 'proportionhouses_D', 'Total Households_D',
       'proportionhouses_O', 'Total Households_O', 'parking_user',
       'bus_line_count_D', 'bus_stop_count_D', 'bus_line_count_O',
       'bus_stop_count_O', 'Name_1_D', 'Proportion night weekend jobs_D',
       'Name_1_O', 'Proportion night weekend jobs_O', 'Unnamed: 0_O',
       'Name_2_D', 'Median household income_D', 'Unnamed: 0', 'Name_2_O',
       'Median household income_O', 'MSTN_D', 'terminal_dummy_2023_D',
       'MSTN_O', 'terminal_dummy_2023_O', 'bike_traveltime', 'bike_cap_D',
       'bike_cap_O', 'bus_competativeness_index', 'ctpp_jobs_D', 'ctpp_jobs_O',
       'HH_below_50k_D', 'HH_below_50k_O', 'hotelcount_D', 'hotelcount_O',
       'entertainmentcount_D', 'entertainmentcount_O', '

In [252]:
off_dataframe_new.drop(['Name_1', 'Name_1_O','Name_1_D','Unnamed: 0_O', 'Name_2_O',
       'Unnamed: 0_D', 'Name_2_D', 'MSTN_O', 'MSTN_D', 'Unnamed: 0'], axis =1, inplace=True)

In [253]:
off_dataframe_new.columns

Index(['O', 'D', 'walkshed_filename_D', 'walkshed_filename_O', 'track_miles',
       'ID_D', 'ID_O', 'pairs', 'passengers', 'riders_miles', 'pairs2',
       'off_peak_fare_per_mile', 'new_auto_tt2', 'new_auto_tt_per_mile2',
       'proportionhouses_D', 'Total Households_D', 'proportionhouses_O',
       'Total Households_O', 'parking_user', 'bus_line_count_D',
       'bus_stop_count_D', 'bus_line_count_O', 'bus_stop_count_O',
       'Proportion night weekend jobs_D', 'Proportion night weekend jobs_O',
       'Median household income_D', 'Median household income_O',
       'terminal_dummy_2023_D', 'terminal_dummy_2023_O', 'bike_traveltime',
       'bike_cap_D', 'bike_cap_O', 'bus_competativeness_index', 'ctpp_jobs_D',
       'ctpp_jobs_O', 'HH_below_50k_D', 'HH_below_50k_O', 'hotelcount_D',
       'hotelcount_O', 'entertainmentcount_D', 'entertainmentcount_O',
       'restaurantcount_D', 'restaurantcount_O', 'intercityhub_D',
       'intercityhub_O', 'prop_str_dens_D', 'prop_str_dens_O',

In [254]:
cols_transform = off_dataframe_new.columns[8:]

for col in cols_transform:
    off_dataframe_new[f'log_{col}'] = np.log1p(off_dataframe_new[col])

In [255]:
off_dataframe_new[[ 'terminal_dummy_2023_O', 'terminal_dummy_2023_D']] = off_dataframe_new[[ 'terminal_dummy_2023_O', 'terminal_dummy_2023_D' ]].fillna(0)

In [256]:
off_dataframe_new

Unnamed: 0,O,D,walkshed_filename_D,walkshed_filename_O,track_miles,ID_D,ID_O,pairs,passengers,riders_miles,...,log_HH_0_car_D,log_HH_0_car_O,log_HH_1_car_D,log_HH_1_car_O,log_HH_more1_car_D,log_HH_more1_car_O,log_pub_admin_jobs_D,log_pub_admin_jobs_O,log_housing_units_afford_D,log_housing_units_afford_O
0,Anacostia,Addison Road,ADDISON ROAD-SEAT PLEASANT,ANACOSTIA,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,12,124.20,...,2.833213,2.302585,5.209486,4.151040,5.370638,2.740840,0.000000,5.866545,,7.09091
1,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,0.000000,2.302585,0.000000,4.151040,0.000000,2.740840,0.000000,5.866545,5.370638,7.09091
2,Anacostia,Archives,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,ANACOSTIA,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,206,650.96,...,0.000000,2.302585,0.000000,4.151040,0.000000,2.740840,0.000000,5.866545,5.370638,7.09091
3,Anacostia,Arlington Cemetery,ARLINGTON CEMETERY,ANACOSTIA,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,3,18.72,...,0.000000,2.302585,0.000000,4.151040,0.000000,2.740840,0.000000,5.866545,,7.09091
4,Anacostia,Ballston-MU,BALLSTON-MU,ANACOSTIA,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,23,209.76,...,0.000000,2.302585,4.166665,4.151040,3.850148,2.740840,5.344562,5.866545,6.347389,7.09091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9111,Addison Road,West Hyattsville,WEST HYATTSVILLE,ADDISON ROAD-SEAT PLEASANT,15.52,MSTN_055,MSTN_062,MSTN_0620MSTN_055,5,77.60,...,2.302585,2.833213,3.401197,5.209486,4.828314,5.370638,0.000000,0.000000,6.311735,
9112,Addison Road,Wheaton,WHEATON,ADDISON ROAD-SEAT PLEASANT,20.28,MSTN_046,MSTN_062,MSTN_0620MSTN_046,6,121.68,...,2.917771,2.833213,4.343805,5.209486,5.178971,5.370638,0.000000,0.000000,5.799093,
9113,Addison Road,Wiehle,WIEHLE-RESTON EAST,ADDISON ROAD-SEAT PLEASANT,29.36,MSTN_091,MSTN_062,MSTN_0620MSTN_091,7,205.52,...,0.000000,2.833213,4.269697,5.209486,5.596568,5.370638,0.000000,0.000000,1.098612,
9114,Addison Road,Woodley Park,WOODLEY PARK-ZOO/ADAMS MORGAN,ADDISON ROAD-SEAT PLEASANT,11.41,MSTN_040,MSTN_062,MSTN_0620MSTN_040,24,273.84,...,3.725693,2.833213,4.855929,5.209486,3.749504,5.370638,1.686302,0.000000,,


In [257]:
off_dataframe_new.to_csv("Outputs/off_dataframe_new.csv")

## Aggregated Ridership 

In [266]:
am_data = pd.read_csv("Outputs/am_dataframe_new.csv").drop('Unnamed: 0', axis=1)

In [271]:
am_data.head()

Unnamed: 0,O,D,track_miles,ID_D,ID_O,pairs,passengers,riders_miles,AVG_TRAINS_D,AVG_TRAINS_O,...,log_HH_0_car_D,log_HH_0_car_O,log_HH_1_car_D,log_HH_1_car_O,log_HH_more1_car_D,log_HH_more1_car_O,log_pub_admin_jobs_D,log_pub_admin_jobs_O,log_housing_units_afford_D,log_housing_units_afford_O
0,Anacostia,Addison Road,10.35,MSTN_062,MSTN_001,MSTN_0010MSTN_062,9,93.15,53,33,...,2.833213,2.302585,5.209486,4.15104,5.370638,2.74084,0.0,5.866545,,7.09091
1,Anacostia,Archives,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,...,0.0,2.302585,0.0,4.15104,0.0,2.74084,0.0,5.866545,5.370638,7.09091
2,Anacostia,Archives,3.16,MSTN_002,MSTN_001,MSTN_0010MSTN_002,436,1377.76,62,33,...,0.0,2.302585,0.0,4.15104,0.0,2.74084,0.0,5.866545,5.370638,7.09091
3,Anacostia,Arlington Cemetery,6.24,MSTN_065,MSTN_001,MSTN_0010MSTN_065,5,31.2,27,33,...,0.0,2.302585,0.0,4.15104,0.0,2.74084,0.0,5.866545,,7.09091
4,Anacostia,Ballston-MU,9.12,MSTN_068,MSTN_001,MSTN_0010MSTN_068,95,866.4,53,33,...,0.0,2.302585,4.166665,4.15104,3.850148,2.74084,5.344562,5.866545,6.347389,7.09091


In [282]:
am_data.merge(all_riders, left_on='ID_D', right_on='ID', how='')#.drop_duplicates(by='entrance_passenger')

Unnamed: 0,O,D,track_miles,ID_D,ID_O,pairs,passengers,riders_miles,AVG_TRAINS_D,AVG_TRAINS_O,...,log_pub_admin_jobs_D,log_pub_admin_jobs_O,log_housing_units_afford_D,log_housing_units_afford_O,ID,START_PLACE_NAME,entrance_passenger,ID_1,END_PLACE_NAME,exit_passenger
0,Ballston-MU,Anacostia,9.12,MSTN_001,MSTN_068,MSTN_0680MSTN_001,18,164.16,33,53,...,5.866545,5.344562,7.090910,6.347389,MSTN_001,Anacostia,40970,MSTN_001,Anacostia,42134
1,Benning Road,Anacostia,7.89,MSTN_001,MSTN_003,MSTN_0030MSTN_001,40,315.60,33,51,...,5.866545,0.000000,7.090910,5.556828,MSTN_001,Anacostia,40970,MSTN_001,Anacostia,42134
2,Bethesda,Anacostia,11.21,MSTN_001,MSTN_052,MSTN_0520MSTN_001,13,145.73,33,56,...,5.866545,6.379306,7.090910,7.030857,MSTN_001,Anacostia,40970,MSTN_001,Anacostia,42134
3,Braddock Road,Anacostia,9.85,MSTN_001,MSTN_078,MSTN_0780MSTN_001,24,236.40,33,57,...,5.866545,1.356393,7.090910,6.059123,MSTN_001,Anacostia,40970,MSTN_001,Anacostia,42134
4,Branch Ave,Anacostia,6.70,MSTN_001,MSTN_077,MSTN_0770MSTN_001,116,777.20,33,33,...,5.866545,0.000000,7.090910,,MSTN_001,Anacostia,40970,MSTN_001,Anacostia,42134
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8519,McLean,Wiehle,7.99,MSTN_091,MSTN_087,MSTN_0870MSTN_091,92,735.08,27,27,...,0.000000,0.000000,1.098612,4.394449,MSTN_091,Wiehle,62358,MSTN_091,Wiehle,64450
8520,West Falls Church,Wiehle,14.70,MSTN_091,MSTN_059,MSTN_0590MSTN_091,10,147.00,27,28,...,0.000000,0.000000,1.098612,,MSTN_091,Wiehle,62358,MSTN_091,Wiehle,64450
8521,Archives,Wiehle,21.07,MSTN_091,MSTN_002,MSTN_0020MSTN_091,3,63.21,27,62,...,0.000000,0.000000,1.098612,5.370638,MSTN_091,Wiehle,62358,MSTN_091,Wiehle,64450
8522,Federal Center SW,Wiehle,21.94,MSTN_091,MSTN_014,MSTN_0140MSTN_091,17,372.98,27,79,...,0.000000,2.764881,1.098612,6.475433,MSTN_091,Wiehle,62358,MSTN_091,Wiehle,64450


In [284]:
all_riders

Unnamed: 0,ID,START_PLACE_NAME,entrance_passenger,ID_1,END_PLACE_NAME,exit_passenger
0,MSTN_001,Anacostia,40970,MSTN_001,Anacostia,42134
1,MSTN_002,Archives-Navy Memorial,76961,MSTN_002,Archives-Navy Memorial,77261
2,MSTN_003,Benning Road,23432,MSTN_003,Benning Road,22922
3,MSTN_004,Brookland,62047,MSTN_004,Brookland,61909
4,MSTN_005,Capitol South,79204,MSTN_005,Capitol South,81996
...,...,...,...,...,...,...
86,MSTN_087,McLean,16688,MSTN_087,McLean,16714
87,MSTN_088,Tysons Corner,30256,MSTN_088,Tysons Corner,31054
88,MSTN_089,Greensboro,10246,MSTN_089,Greensboro,10562
89,MSTN_090,Spring Hill,9059,MSTN_090,Spring Hill,9301
