In [11]:
# Make Jupyter Notebook full screen 
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

## Objective:

Want to make heat maps of the city using current bus routes. Maps include:

- Average number of transfers
- Farthest distance possible with zero transfers

### Notes:  

- FarthestDistance() only works with zero transfers, need to make that more as an update to the code
- Not considering trips with times past midnight

Author: Megan Tabbutt <br/>
Last Updated: 9_2_20

In [12]:
# imports:

import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np

In [13]:
# Data paths: 

GTFS_DATA_PATH = "mmt_gtfs/"
# import os; os.listdir(GTFS_DATA_PATH) # Peak whats in the directory 

In [14]:
# Set matplotlib preferences for rest of notebook:
%matplotlib inline

font = {'family' : 'serif', 'weight' : 'normal', 'size'   : 18}
plt.rc('font', **font) #Set this one as the defult, change to 1 or 2 in the plotting code cell

font1 = {'family': 'serif', 'weight': 'bold', 'size': 20, }
font2 = {'family': 'serif', 'weight': 'normal', 'size': 16, }

In [15]:
def getPandasDFCSV(path, file, sep=','):  
    """ Keyword Arg sep: deliminator used in txt file (default = ',')"""
    pandasDF = pd.read_csv(path +  file, sep=sep)
    return pandasDF

In [16]:
trips_df = getPandasDFCSV(GTFS_DATA_PATH, 'trips.txt')
trips_df.info()
trips_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5857 entries, 0 to 5856
Data columns (total 14 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   route_id               5857 non-null   int64 
 1   route_short_name       5857 non-null   int64 
 2   service_id             5857 non-null   object
 3   trip_id                5857 non-null   int64 
 4   trip_headsign          5857 non-null   object
 5   direction_id           5857 non-null   int64 
 6   direction_name         5857 non-null   object
 7   block_id               5857 non-null   int64 
 8   shape_id               5857 non-null   int64 
 9   shape_code             5857 non-null   object
 10  trip_type              5272 non-null   object
 11  trip_sort              5857 non-null   int64 
 12  wheelchair_accessible  5857 non-null   int64 
 13  bikes_allowed          5857 non-null   int64 
dtypes: int64(9), object(5)
memory usage: 640.7+ KB


Unnamed: 0,route_id,route_short_name,service_id,trip_id,trip_headsign,direction_id,direction_name,block_id,shape_id,shape_code,trip_type,trip_sort,wheelchair_accessible,bikes_allowed
0,9041,16,92_WKD,9999999,EAST TRANSFER,0,East Transfer,193133,55293,P16E,D,19620,1,1
1,9028,2,92_WKD,1007843,NORTH TRANSFER: VIA SHERMAN,0,North Transfer,194263,55156,2S,W,20100,1,1
2,9028,2,92_WKD,1007844,NORTH TRANSFER: VIA FORDEM,0,North Transfer,194056,55154,2F,W,21600,1,1
3,9028,2,92_WKD,1007499,NORTH TRANSFER: VIA SHERMAN,0,North Transfer,194052,55156,2S,D,23400,1,1
4,9028,2,92_WKD,1007497,NORTH TRANSFER: VIA FORDEM,0,North Transfer,194058,55154,2F,D,25200,1,1


In [17]:
stop_times_df = getPandasDFCSV(GTFS_DATA_PATH, 'stop_times.txt')
stop_times_df.info()
stop_times_df.head(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 217197 entries, 0 to 217196
Data columns (total 10 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   trip_id              217197 non-null  int64  
 1   stop_sequence        217197 non-null  int64  
 2   stop_id              217197 non-null  int64  
 3   pickup_type          217197 non-null  int64  
 4   drop_off_type        217197 non-null  int64  
 5   arrival_time         217197 non-null  object 
 6   departure_time       217197 non-null  object 
 7   timepoint            217197 non-null  int64  
 8   stop_headsign        217197 non-null  object 
 9   shape_dist_traveled  217197 non-null  float64
dtypes: float64(1), int64(6), object(3)
memory usage: 16.6+ MB


Unnamed: 0,trip_id,stop_sequence,stop_id,pickup_type,drop_off_type,arrival_time,departure_time,timepoint,stop_headsign,shape_dist_traveled
0,9999999,1,7605,0,1,5:27:00,5:27:00,1,EAST TRANSFER,0.0249
1,9999999,2,7739,0,0,5:27:40,5:27:40,0,EAST TRANSFER,0.2003
2,9999999,3,7119,0,0,5:28:33,5:28:33,0,EAST TRANSFER,0.4352


In [18]:
arrivalTimesArray = np.array(stop_times_df['arrival_time'])

arrivalTimesArrayFixed = []
for time in arrivalTimesArray:
    timeList = time.split(":")
    timeList = [int(time) for time in timeList]
    if timeList[0] < 24:
        dateTime = datetime.datetime(2000, 1, 1, timeList[0], timeList[1], timeList[2])
    else:
        dateTime = datetime.datetime(2000, 1, 2, timeList[0]-24, timeList[1], timeList[2])
    arrivalTimesArrayFixed.append(dateTime)
    
stop_times_df['arrival_time'] = arrivalTimesArrayFixed

departureTimesArray = np.array(stop_times_df['departure_time'])

departureTimesArrayFixed = []
for time in departureTimesArray:
    timeList = time.split(":")
    timeList = [int(time) for time in timeList]
    if timeList[0] < 24:
        dateTime = datetime.datetime(2000, 1, 1, timeList[0], timeList[1], timeList[2])
    else:
        dateTime = datetime.datetime(2000, 1, 2, timeList[0]-24, timeList[1], timeList[2])
    departureTimesArrayFixed.append(dateTime)
    
stop_times_df['departure_time'] = departureTimesArrayFixed

stop_times_df.head()

Unnamed: 0,trip_id,stop_sequence,stop_id,pickup_type,drop_off_type,arrival_time,departure_time,timepoint,stop_headsign,shape_dist_traveled
0,9999999,1,7605,0,1,2000-01-01 05:27:00,2000-01-01 05:27:00,1,EAST TRANSFER,0.0249
1,9999999,2,7739,0,0,2000-01-01 05:27:40,2000-01-01 05:27:40,0,EAST TRANSFER,0.2003
2,9999999,3,7119,0,0,2000-01-01 05:28:33,2000-01-01 05:28:33,0,EAST TRANSFER,0.4352
3,9999999,4,7107,0,0,2000-01-01 05:28:53,2000-01-01 05:28:53,0,EAST TRANSFER,0.5255
4,9999999,5,7663,0,0,2000-01-01 05:29:51,2000-01-01 05:29:51,0,EAST TRANSFER,0.782


In [28]:
# given a starting bus stop, look through all the routes that 


def FarthestDistance(startBusStop, pickUpTime, timeDelta=30.0, num_transfers=0):
    """
    pickUpTime: datetime object of format: datetime(2000, 1, 1, H, M, S) or if after midnight start: datetime(2000, 1, 2, H, M, S)
    startBusStop: int - the stop_id of the bus stop that your trip is starting from
    """
    
    timeDelta = timeDelta*60 #minute to second conversion for datetime timedelta objects
    possibleTrips = stop_times_df[stop_times_df['stop_id'] == startBusStop]
    arrivalTimes = possibleTrips['arrival_time']
    timeDeltas = [(arrivalTimes - pickUpTime) for time in arrivalTimes]
    print(timeDeltas)
    
    
FarthestDistance(7605, datetime.datetime(2000, 1, 2, 5, 28, 0))

[0        -2 days +23:59:00
35025    -1 days +00:29:00
35065    -1 days +00:56:00
35105    -1 days +01:26:00
35145    -1 days +01:56:00
                ...       
194948   -1 days +12:57:00
194979   -1 days +13:57:00
195010   -1 days +14:57:00
195041   -1 days +15:57:00
195072   -1 days +16:57:00
Name: arrival_time, Length: 183, dtype: timedelta64[ns], 0        -2 days +23:59:00
35025    -1 days +00:29:00
35065    -1 days +00:56:00
35105    -1 days +01:26:00
35145    -1 days +01:56:00
                ...       
194948   -1 days +12:57:00
194979   -1 days +13:57:00
195010   -1 days +14:57:00
195041   -1 days +15:57:00
195072   -1 days +16:57:00
Name: arrival_time, Length: 183, dtype: timedelta64[ns], 0        -2 days +23:59:00
35025    -1 days +00:29:00
35065    -1 days +00:56:00
35105    -1 days +01:26:00
35145    -1 days +01:56:00
                ...       
194948   -1 days +12:57:00
194979   -1 days +13:57:00
195010   -1 days +14:57:00
195041   -1 days +15:57:00
195072   -1 days +16:

NameError: name 'arrival_time' is not defined

In [None]:


# These times show up and can cause problems: There are many more too, but be aware. 
# badTimes = ['24:04:00', '24:00:00', '24:23:00', '24:09:00', '24:39:00', '24:13:00', '24:18:00', '24:03:00', '24:21:25', '24:02:57', '24:09:37']

def getBusRouteZeroTransfersFarthestPoint(start, RideKey, startTime): # Ridekey = 87_WKD for now
    
    StartBusRoutes = []
    
    #Find the routes that have this stop as an option for them to stop at

    for i in routesStopsTryAgain:
        if start in routesStopsTryAgain[i]:
            StartBusRoutes.append(i)
    #print(StartBusRoutes)
    
    # Get the dataframe list for these routes
    possibleTrips = []
    for i in StartBusRoutes:
        if 1 > 0:   # I hate untabbing things, to yea, this is true... 
            #print(i)
            tripsDataStart = tripsData[tripsData.route_short_name == int(i)]
            tripsDataStart = tripsDataStart[tripsDataStart.service_id == RideKey]
        
            for j in tripsDataStart.iterrows():
                tripID = j[1]['trip_id']
                stopsTimeData = stopTimesData[stopTimesData.trip_id == tripID]
            
                for n in stopsTimeData.iterrows():
                    if n[1]['stop_id'] == start:
                        #if n[1]['arrival_time'] in badTimes:
                            #continue
                        # There are bad entries for times like in the list given above, just skip these instances
                        try:
                            datetime_objectRoute = datetime.strptime(n[1]['arrival_time'], '%H:%M:%S')
                        except ValueError:
                            continue
                        #print(datetime_objectRoute)
                        datetime_objectStart = datetime.strptime(startTime, '%H:%M:%S')
                        timeDelta = abs(datetime_objectStart - datetime_objectRoute) #I am timedelta object
                        if timeDelta.seconds < (30*60):
                            timeDelta
                            possibleTrips.append(tripID)
    #print(possibleTrips)
    
    #Now we have a list of possible trips, need to find the farthest distance to go in each trip and then the farthest of those possibilities
    farthestPoint = [0.0]
    for trip in possibleTrips:
        stopTimesDataSpec = stopTimesData[stopTimesData.trip_id == int(trip)]
        for stop in stopTimesDataSpec.iterrows():
            startingPoint = stopIDLookUp[str(start)]
            endingStop = stop[1]['stop_id']
            endingPoint = stopIDLookUp[str(endingStop)]
            distanceMiles = MilesDistanceFromGlobePoints(startingPoint[0], startingPoint[1], endingPoint[0], endingPoint[1])
            if distanceMiles > farthestPoint[0]:
                farthestPoint[0] = distanceMiles
    #print(farthestPoint)
    return farthestPoint[0]

# I am example formatting and an example run that will work for testing: 
getBusRouteZeroTransfersFarthestPoint(4100, '87_WKD', '8:20:00')

