# Transit Availability

Generating a measure of transit level-of-service linked to a Hex Grid in the Toronto Region

This measure is the number of unique transit trips which stop in each cell

GTFS data were sourced from https://transitfeeds.com/ (data range is for Nov 28, 2018 to Dec 22, 2018)

In [135]:
! tree ../input_data/transit_gtfs/

[01;34m../input_data/transit_gtfs/[00m
└── [01;34m2018_december[00m
    ├── [01;31mgtfs_barrie.zip[00m
    ├── [01;31mgtfs_brampton.zip[00m
    ├── [01;31mgtfs_burlington.zip[00m
    ├── [01;31mgtfs_durham.zip[00m
    ├── [01;31mgtfs_go.zip[00m
    ├── [01;31mgtfs_guelph.zip[00m
    ├── [01;31mgtfs_hamilton.zip[00m
    ├── [01;31mgtfs_kw.zip[00m
    ├── [01;31mgtfs_milton.zip[00m
    ├── [01;31mgtfs_mississauga.zip[00m
    ├── [01;31mgtfs_niagara.zip[00m
    ├── [01;31mgtfs_oakville.zip[00m
    ├── [01;31mgtfs_toronto.zip[00m
    └── [01;31mgtfs_york.zip[00m

1 directory, 14 files


We are using python to count the number of unique trips per stop

In [163]:
import pandas as pd
import geopandas as gpd
import zipfile
from shapely.geometry import Point

# and empty dataframe to fill with stop data
Data = {'stop_id': ['xxx'],
        'stop_lat': [0.0],
        'stop_lon': [0.0],
        'N_trips_sat': [0],
        'N_trips_sun': [0],
        'N_trips_wed': [0],
        'N_week': [0],
        'N_per_hour': [0.0]
        }
dfs = pd.DataFrame(Data, columns = ['stop_lat','stop_lon','N_trips_sat','N_trips_sun','N_trips_wed','N_week','N_per_hour'], index = Data['stop_id'])
dfs

Unnamed: 0,stop_lat,stop_lon,N_trips_sat,N_trips_sun,N_trips_wed,N_week,N_per_hour
xxx,0.0,0.0,0,0,0,0,0.0


In [164]:
# the list of gtfs.zip files in which we are going to use
gtfs_list = ['../input_data/transit_gtfs/2018_december/gtfs_barrie.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_brampton.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_burlington.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_durham.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_go.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_guelph.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_hamilton.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_kw.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_milton.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_mississauga.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_niagara.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_oakville.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_toronto.zip',
               '../input_data/transit_gtfs/2018_december/gtfs_york.zip'
              ]

In [165]:
# looping over each GTFS file, counting the number of trips per stop 

for gtfs in gtfs_list:

    z = zipfile.ZipFile(gtfs)

    df_stops = pd.read_csv(z.open('stops.txt'), dtype = "str")
    df_stop_times = pd.read_csv(z.open('stop_times.txt'), dtype = "str")
    df_trips = pd.read_csv(z.open('trips.txt'), dtype = "str")
    
    # outputting list of service IDs to query trips (only those for the week of interest Dec 2-8)
    # if there is a calendar file
    try:
        df_cal = pd.read_csv(z.open('calendar.txt'),dtype={'service_id':'str'})
        df_cal["anydays"] = df_cal["monday"] + df_cal["tuesday"] + df_cal["wednesday"] + df_cal["thursday"] + df_cal["friday"] + df_cal["saturday"] + df_cal["sunday"]
        service_ids_wed = df_cal.query('start_date <= 20181202 & end_date >= 20181208 & wednesday > 0').service_id
        service_ids_sat = df_cal.query('start_date <= 20181202 & end_date >= 20181208 & saturday > 0').service_id
        service_ids_sun = df_cal.query('start_date <= 20181202 & end_date >= 20181208 & sunday > 0').service_id
    # if not, and we only have the calendar_dates file
    except:
        df_cal = pd.read_csv(z.open('calendar_dates.txt'),dtype={'service_id':'str'})
        service_ids_wed = df_cal.query('date == 20181205').service_id
        service_ids_sat = df_cal.query('date == 20181208').service_id
        service_ids_sun = df_cal.query('date == 20181209').service_id
    
    service_ids = service_ids_sat

    # grabbing those trips that are linked with the service ID
    trip_ids = df_trips[df_trips['service_id'].isin(service_ids)].trip_id

    # subsetting the stop_times by these trips
    df_stop_times_subset = df_stop_times[df_stop_times["trip_id"].isin(trip_ids)]

    # counting the number of trips per stop
    dfsat = pd.DataFrame(df_stop_times_subset['stop_id'].value_counts())
    dfsat.columns = ['N_trips_sat']
    
    service_ids = service_ids_sun

    # grabbing those trips that are linked with the service ID
    trip_ids = df_trips[df_trips['service_id'].isin(service_ids)].trip_id

    # subsetting the stop_times by these trips
    df_stop_times_subset = df_stop_times[df_stop_times["trip_id"].isin(trip_ids)]
    dfsun = pd.DataFrame(df_stop_times_subset['stop_id'].value_counts())
    dfsun.columns = ['N_trips_sun']

    dfo = dfsat.join(dfsun,how='outer')
    
    service_ids = service_ids_wed

    # grabbing those trips that are linked with the service ID
    trip_ids = df_trips[df_trips['service_id'].isin(service_ids)].trip_id

    # subsetting the stop_times by these trips
    df_stop_times_subset = df_stop_times[df_stop_times["trip_id"].isin(trip_ids)]
    dfwed = pd.DataFrame(df_stop_times_subset['stop_id'].value_counts())
    dfwed.columns = ['N_trips_wed']
    dfwed['N_trips_wed'] = dfwed['N_trips_wed'] * 5 # by number of weekdays
    dfo = dfo.join(dfwed,how='outer')

    dfo = df_stops.set_index('stop_id').join(dfo)
    dfo = dfo[['stop_lat','stop_lon','N_trips_sat','N_trips_sun','N_trips_wed']]
    
    dfo = dfo.fillna(0)
    dfo["N_week"] = dfo["N_trips_sat"] + dfo["N_trips_sun"] + dfo["N_trips_wed"]
    dfo["N_per_hour"] = dfo["N_week"] / (7 * 24)

    dfs = dfs.append(dfo)

In [167]:
dfs.stop_lon = pd.to_numeric(dfs.stop_lon)
dfs.stop_lat = pd.to_numeric(dfs.stop_lat)

geometry = [Point(xy) for xy in zip(dfs.stop_lon, dfs.stop_lat)]
crs = {'init': 'epsg:4326'}
gdfs = gpd.GeoDataFrame(dfs, crs=crs, geometry=geometry)
gdfs = gdfs.to_crs(epsg="32617")

In [None]:
gdfs.to_file(driver = 'ESRI Shapefile', filename= "../input_data/transit_gtfs/2018_december/stops_N_trips.shp")

Input data into PosGIS

```sh
shp2pgsql -I -s 32617 -W "latin1" input_data/transit_gtfs/2018_december/stops_N_trips.shp stops_N_trips | psql -U ja -d urban_form_toronto
```

Join the data to the hex grid

```sql
-- first create a spatial index on the points

DROP INDEX IF EXISTS stops_N_trips_gix;
CREATE INDEX stops_N_trips_gix ON stops_N_trips USING GIST (geom); 

-- spatial join hex to stops, summing the trip counts by hex id

DROP TABLE IF EXISTS out_data_hex_transit2018;
CREATE TABLE out_data_hex_transit2018 AS (
SELECT
sum(coalesce(stops_N_trips.n_per_hour,0)) AS n_per_hour,
hex_grid_200m.id AS hexid
FROM
stops_N_trips RIGHT OUTER JOIN hex_grid_200m ON ST_Intersects(stops_N_trips.geom,hex_grid_200m.geom)
GROUP BY hexid
);
    
-- output
    
\COPY out_data_hex_transit2018 TO 'out_data_hex_transit2018.csv' WITH (FORMAT CSV, HEADER);


```