In [1]:
import os
os.makedirs('output', exist_ok=True)

import sys
sys.path.append('../')

from utils import load_walksheds, overlay_wks

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
import warnings
warnings.filterwarnings("ignore")

import gtfs_kit as gk
from gtfs_functions import Feed

#### Read in Datasets and Files

* walskhedbuffer is the Metro station walksheds with overlapping boundaries
* metro_walkshed_name contains the MSTN ID numbers and station names
* feed contains GTFS bus data for the Washington Area

In [2]:
wksp5, wksp75 = load_walksheds()

In [3]:
metro_walkshed_name = pd.read_csv('../../Data/metro_walkshed_name.csv')
feed = Feed('../../Data/gtfs.zip')

#### Preprocessing


* Merge the walkshed buffer with the metro station names to bring in the MSTN IDs
* Retrive the routes, trips, stops, stop times and shapes from the GTFS data

In [4]:
wksp5_MSTN, wksp75_MSTN = wksp5.merge(metro_walkshed_name,on='Name_1'), wksp75.merge(metro_walkshed_name,on='Name_1')

In [5]:
routes = feed.routes
trips = feed.trips
stops = feed.stops
stop_times = feed.stop_times
shapes = feed.shapes


INFO:root:Reading "routes.txt".
INFO:root:accessing trips
INFO:root:Start date is None. You should either specify a start date or set busiest_date to True.
INFO:root:Reading "trips.txt".
INFO:root:File "calendar.txt" not found.
INFO:root:Reading "calendar_dates.txt".
INFO:root:The busiest date/s of this feed or your selected date range is/are:  ['2020-07-17', '2020-06-19', '2020-08-07', '2020-05-08', '2020-07-31', '2020-07-03', '2020-06-05', '2020-05-15', '2020-10-02', '2020-08-14', '2020-09-25', '2020-05-29', '2020-08-21', '2020-09-11', '2020-09-18', '2020-06-12', '2020-08-28', '2020-06-26', '2020-07-24', '2020-10-09', '2020-09-04', '2020-07-10'] with 16380 trips.
INFO:root:In the case that more than one busiest date was found, the first one will be considered.
INFO:root:In this case is 2020-07-17.
INFO:root:Reading "stop_times.txt".
INFO:root:_trips is defined in stop_times
INFO:root:Reading "stops.txt".
INFO:root:computing patterns
INFO:root:Reading "shapes.txt".


* Filter the shapes for when the shape is within the trips
* Intersect the shapes of the bus routes with the walksheds, groupby the station and aggregate the number of lines by the number of unique lines
*  Intersect the shapes of the bus routes with the walksheds, groupby the station and aggregate the number of spots by the number of unique stop names
*  Merge the bus lines and the bus stops together
*  Fill any NAs with 0
*  Export only the bus lines, bus stops, station names and station IDs
*  All of these steps are repeated for the 0.5 and 0.75 walksheds

In [6]:
route_shapes = shapes[shapes.shape_id.isin(trips.shape_id.unique())]

In [7]:
stops.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,geometry
0,1,1002500,WISCONSIN AVE NW & RIVER RD NW,,38.949019,-77.080369,8,POINT (-77.08037 38.94902)
1,3,3002395,9TH ST & RAILROAD AVE,,39.008222,-76.780363,8,POINT (-76.78036 39.00822)
2,9,3002207,14TH AVE & LANGLEY WAY,,38.990257,-76.984353,5,POINT (-76.98435 38.99026)
3,10,3002213,14TH AVE & LANGLEY WAY,,38.990479,-76.984519,5,POINT (-76.98452 38.99048)
4,11,3002191,KANAWHA ST & 14TH AVE,,38.98832,-76.983581,5,POINT (-76.98358 38.98832)


In [8]:
walkshed_line_count_p5, walkshed_line_count_p75 = overlay_wks(route_shapes)

In [9]:
walkshed_line_count_p5_MSTN, walkshed_line_count_p75_MSTN = walkshed_line_count_p5.merge(metro_walkshed_name,on='Name_1'), walkshed_line_count_p75.merge(metro_walkshed_name,on='Name_1')

In [10]:
walkshed_line_count_p5_MSTN = walkshed_line_count_p5_MSTN.groupby('MSTN').agg({'shape_id':'nunique'})\
.reset_index().rename({'shape_id':'line_count_p5'},axis=1)

walkshed_line_count_p75_MSTN = walkshed_line_count_p75_MSTN.groupby('MSTN').agg({'shape_id':'nunique'})\
.reset_index().rename({'shape_id':'line_count_p75'},axis=1)


In [11]:
walkshed_stop_count_p5, walkshed_stop_count_p75 = overlay_wks(stops)

In [12]:
walkshed_stop_count_p5_MSTN, walkshed_stop_count_p75_MSTN = walkshed_stop_count_p5.merge(metro_walkshed_name,on='Name_1'), walkshed_stop_count_p75.merge(metro_walkshed_name,on='Name_1')

In [13]:
walkshed_stop_count_p5_MSTN = walkshed_stop_count_p5_MSTN.groupby('MSTN').agg({'stop_name':'nunique'})\
.reset_index().rename({'stop_name':'stop_count_p5'},axis=1)

walkshed_stop_count_p75_MSTN = walkshed_stop_count_p75_MSTN.groupby('MSTN').agg({'stop_name':'nunique'})\
.reset_index().rename({'stop_name':'stop_count_p75'},axis=1)

In [16]:
walkshedbuffer_line_stop_p5 = wksp5_MSTN.merge(walkshed_line_count_p5_MSTN,on='MSTN',how='left')\
.merge(walkshed_stop_count_p5_MSTN,on='MSTN',how='left')

walkshedbuffer_line_stop_p75 = wksp75_MSTN.merge(walkshed_line_count_p75_MSTN,on='MSTN',how='left')\
.merge(walkshed_stop_count_p75_MSTN,on='MSTN',how='left')


walkshedbuffer_line_stop_p5[['line_count_p5','stop_count_p5']] = walkshedbuffer_line_stop_p5[['line_count_p5','stop_count_p5']].fillna(0)

walkshedbuffer_line_stop_p75[['line_count_p75','stop_count_p75']] = walkshedbuffer_line_stop_p75[['line_count_p75','stop_count_p75']].fillna(0)

walkshedbuffer_line_stop_p75.head(2)

Unnamed: 0,Name_1,Acres,Shape_Leng,Shape_Area,StnCode,geometry,SHED_NAME,MSTN,line_count_p75,stop_count_p75
0,ADDISON ROAD-SEAT PLEASANT,246.285276,16545.122053,10728140.0,,"POLYGON ((-76.88310 38.88655, -76.88313 38.885...",Addison Road,MSTN_062,36.0,33.0
1,ANACOSTIA,333.682311,20554.03434,14535140.0,,"POLYGON ((-76.98076 38.86278, -76.98079 38.861...",Anacostia S,MSTN_001,69.0,75.0


In [17]:
walkshedbuffer_line_stop_p5[['Name_1','MSTN','line_count_p5','stop_count_p5']]\
.to_csv('output/bus_line_stop_p5.csv',index=False)

walkshedbuffer_line_stop_p75[['Name_1','MSTN','line_count_p75','stop_count_p75']]\
.to_csv('output/bus_line_stop_p75.csv',index=False)