In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
import warnings
warnings.filterwarnings("ignore")

import gtfs_kit as gk
from gtfs_functions import Feed
from gtfs_functions import map_gdf

#### Read in Datasets and Files

* walskhedbuffer is the Metro station walksheds with overlapping boundaries
* metro_walkshed_name contains the MSTN ID numbers and station names
* feed contains GTFS bus data for the Washington Area

In [2]:
walkshedbuffer = gpd.read_file('../../Data/walksheds_overlapping.zip')
metro_walkshed_name = pd.read_csv('../../Data/metro_walkshed_name.csv')
feed = Feed('../../Data/gtfs.zip')

#### Preprocessing


* Convert the CRS of the walkshed boundaries layer
* Remove extra characters and numbers from the station names
* Merge the walkshed buffer with the metro station names to bring in the MSTN IDs
* Retrive the routes, trips, stops, stop times and shapes from the GTFS data

In [3]:
walkshedbuffer = walkshedbuffer.to_crs("EPSG:4326")
walkshedbuffer['Name_1']=walkshedbuffer['Name_1'].str.replace(' : 0 - 2640','')
walkshedbuffer['Name_1']=walkshedbuffer['Name_1'].str.replace(' : 0 - 22.4525758392805','')
walkshedbuffer_MSTN = walkshedbuffer.merge(metro_walkshed_name,on='Name_1')

In [4]:
routes = feed.routes
trips = feed.trips
stops = feed.stops
stop_times = feed.stop_times
shapes = feed.shapes


INFO:root:Reading "routes.txt".
INFO:root:accessing trips
INFO:root:Reading "trips.txt".
INFO:root:Reading "trips.txt".
INFO:root:computing patterns
INFO:root:_trips is defined in stop_times
INFO:root:Reading "stops.txt".
INFO:root:Reading "stop_times.txt".
INFO:root:Reading "shapes.txt".


* Filter the shapes for when the shape is within the trips
* Intersect the shapes of the bus routes with the walksheds, groupby the station and aggregate the number of lines by the number of unique lines
*  Intersect the shapes of the bus routes with the walksheds, groupby the station and aggregate the number of spots by the number of unique stop names
*  Merge the bus lines and the bus stops together
*  Fill any NAs with 0
*  Export only the bus lines, bus stops, station names and station IDs

In [5]:
route_shapes = shapes[shapes.shape_id.isin(trips.shape_id.unique())]

In [6]:
stops.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,geometry
0,1,1002500,WISCONSIN AVE NW & RIVER RD NW,,38.949019,-77.080369,8,POINT (-77.08037 38.94902)
1,3,3002395,9TH ST & RAILROAD AVE,,39.008222,-76.780363,8,POINT (-76.78036 39.00822)
2,9,3002207,14TH AVE & LANGLEY WAY,,38.990257,-76.984353,5,POINT (-76.98435 38.99026)
3,10,3002213,14TH AVE & LANGLEY WAY,,38.990479,-76.984519,5,POINT (-76.98452 38.99048)
4,11,3002191,KANAWHA ST & 14TH AVE,,38.98832,-76.983581,5,POINT (-76.98358 38.98832)


In [7]:
walkshed_line_count = gpd.overlay(route_shapes,walkshedbuffer_MSTN,how='intersection').groupby('MSTN').agg({'shape_id':'nunique'})\
.reset_index().rename({'shape_id':'line_count'},axis=1)
walkshed_line_count.head(2)

Unnamed: 0,MSTN,line_count
0,MSTN_001,54
1,MSTN_002,68


In [8]:
walkshed_stop_count = gpd.overlay(stops,walkshedbuffer_MSTN,how='intersection').groupby('MSTN').agg({'stop_name':'nunique'})\
.reset_index().rename({'stop_name':'stop_count'},axis=1)
walkshed_stop_count.head(2)

Unnamed: 0,MSTN,stop_count
0,MSTN_001,36
1,MSTN_002,50


In [9]:
walkshedbuffer_line_stop = walkshedbuffer_MSTN.merge(walkshed_line_count,on='MSTN',how='left')\
.merge(walkshed_stop_count,on='MSTN',how='left')
walkshedbuffer_line_stop[['line_count','stop_count']] = walkshedbuffer_line_stop[['line_count','stop_count']].fillna(0)
walkshedbuffer_line_stop.head(2)

Unnamed: 0,Name_1,Acres,Shape_Leng,Shape_Area,StnCode,geometry,SHED_NAME,MSTN,line_count,stop_count
0,ADDISON ROAD-SEAT PLEASANT,246.285276,16545.122053,10728140.0,,"POLYGON ((-76.90003 38.89164, -76.89914 38.891...",Addison Road,MSTN_062,29.0,18.0
1,ANACOSTIA,333.682311,20554.03434,14535140.0,,"POLYGON ((-76.99650 38.86928, -76.99594 38.869...",Anacostia S,MSTN_001,54.0,36.0


In [10]:
walkshedbuffer_line_stop[['Name_1','MSTN','line_count','stop_count']]\
.to_csv('output/bus_line_stop.csv',index=False)