# API to get Live Data

In [2]:
# mbtaonbus
# main_functions.py
# Created by M. Haynes
# Late July 2018 & January 2019 (Flask)
#

from urllib.request import Request, urlopen
import gzip, json, datetime, urllib, math, os
import pandas as pd

# Get the MBTA API key from the system variables:
apikey = 'bacd751f713146e091341c228d6e0ba0'
mainurl='https://api-v3.mbta.com/'
data_path = '../data/MBTA_GTFS/'

def getmapboxkey():
    map_parms = {"mapbox_key": os.environ["MAPBOX_KEY"],
                 "mapbox_user": os.environ["MAPBOX_USER"],
                 "mapbox_style": os.environ["MAPBOX_STYLE"]}
    return map_parms

# Route Data:
route_data = pd.read_csv(data_path+'routes2.txt')

# Stop Data:
stop_data = pd.read_csv(data_path+'stops2.txt')

# Shape Data:
shape_data = pd.read_csv(data_path+'shapes2.txt')
shape_data = shape_data.round({'shape_pt_lat': 6, 'shape_pt_lon': 6})

# Trip Pattern Data:
trip_patterns = pd.read_csv(data_path+'trip_patterns.txt', dtype={'trip_id': int, 'pattern': int, 'shape_id': str})

# Unique Patterns:
unique_stopstrings = pd.read_csv(data_path+'unique_stopstrings.txt', dtype={'pattern': int, 'stops': str})
unique_stopstrings.stops.apply(lambda x: x[1:-1].split(',') )

#Function that gets data compressed and then uncompresses it to JSON, returns JSON:
def apiget(midurl,endurl=''):
    fullurl = mainurl + midurl + '?api_key=' + apikey + endurl
    #print(fullurl)
    req = Request(fullurl)
    req.add_header('Accept-Encoding', 'gzip')
    try:
        response = urlopen(req)
    except:
        return  # Ideally return an error code here and then print a different page
    try:
        content = gzip.decompress(response.read())    # If it fails run the uncompressed version
    except:
        return
    json_out = json.loads(content.decode('utf-8'))
    return json_out

# Sometimes it won't compress so this is for those times, returns JSON
def apiget_nocompress(midurl,endurl=''):
    fullurl=mainurl+midurl+'?api_key='+apikey+endurl
    request = urllib.request.urlopen(fullurl)
    json_out = json.loads(request.read().decode())
    return json_out

# Function to convert time to a nice string for presentation:
def converttime(time_in):
    if time_in is None: return
    if time_in[-3:-2] == ":":
        time_in = time_in[:-3]+time_in[-2:]  #remove the last colon from UTC offset
    time_out = datetime.datetime.strptime(time_in,"%Y-%m-%dT%H:%M:%S%z")
    time_out = time_out.strftime('%-I:%M:%S')
    return time_out

# Function to look up data and return a dictionary of attributes:
def getdata(data,id_col,id_in):
    try:
        output = data[data[id_col] == str(id_in)].to_dict('records')[0]
    except:
        output = {}
    return output

# Function to return the ordinal version of a number:   (I have no idea how this works!)
# Found: https://stackoverflow.com/questions/9647202/ordinal-numbers-replacement
ordinal = lambda n: "%d%s" % (n,"tsnrhtdd"[(math.floor(n/10)%10!=1)*(n%10<4)*n%10::4])

# Function to return a string even if None:
xstr = lambda s: '' if s is None else str(s)

# Function to return a list of active vehicles (for the main page to select a bus)
def getvehicles():
    i = 0
    veh_list = []
    veh_data = apiget('vehicles')
    for i in range(len(veh_data['data'])):   # From 0 to length of vehicles list
        if veh_data['data'][i]['id'][0] == 'y' and veh_data['data'][i]['id'][1] != 's':
            veh_list.append(veh_data['data'][i]['id'][1:])
    return veh_list

# Function to get the basic bus data (test for data to ensure it is out there)
def getbasicdata(veh):
    veh_data = apiget('vehicles/y'+str(veh))
    if veh_data is not None:
        return processveh_data(veh_data)
    else:
        return


# Function to process the data into dictionary for webpage.
def processveh_data(veh_data):
    # Current time:
    currentDT = datetime.datetime.now() # + datetime.timedelta(hours=1)  # +1 to get to EST

    # Get the Trip Data from the API:
    trip_id =  veh_data['data']['relationships']['trip']['data']['id']
    trip_data = apiget('trips/'+trip_id)

    # Current Bus details:
    try:
        route_id = veh_data['data']['relationships']['route']['data']['id']
    except:
        route_id = '0'
    try:
        dir_id = veh_data['data']['attributes']['direction_id']
    except:
        dir_id = 0
    try:
        stop_id = veh_data['data']['relationships']['stop']['data']['id']
    except:
        stop_id = 0

    try:
    	shape_id = trip_data['data']['relationships']['shape']['data']['id']
    except:
        shape_id = 0

    lat = veh_data['data']['attributes']['latitude']
    long = veh_data['data']['attributes']['longitude']
    heading = veh_data['data']['attributes']['bearing']
    location = str(lat) +','+ str(long) +','+ str(heading)
    current_status = veh_data['data']['attributes']['current_status']

    veh_data_out = {
        "vehicle_number" : int(veh_data['data']['attributes']['label']),
        "lat" : lat, "long" : long, "heading" : heading, "location" : location,
	"updated_at" : converttime(veh_data['data']['attributes']['updated_at']),
	"current_time" : currentDT.strftime('%-I:%M:%S %p'),
	"current_status" : current_status.replace("_", " ").lower(),

	"route_id" : route_id, "dir_id" : dir_id,
	"stop_id" : stop_id,
	"stop" : str.upper(xstr(getdata(stop_data,'stop_id',stop_id).get("stop_name"))),
	"trip_id" : trip_id,

	"route_name" : getdata(route_data,'route_id',route_id).get('route_short_name'),
	"route_des" : getdata(route_data,'route_id',route_id).get('route_long_name'),

	"headsign" : trip_data['data']['attributes']['headsign'],
	"shape_id" : shape_id
    }
    return veh_data_out

# Function to get the predictions and return a dictionary (ideally of size 8)
def getpredictions(trip_id):
	pred_results = apiget('predictions','&filter[trip]='+trip_id)
	pred_data = []
	n = 0  # The prediction position in the main list
	m = 0  # The prediction position in what is returned

	## Get the length of the pred_results array and evenly obtain 8 predictions
	## If an odd number should not show stop #8 so that we always have 8
	try:
		pred_length = len(pred_results['data'])
	except:
		pred_length = 0
		return
	factor = math.trunc(pred_length/8)+1

	if pred_length < 10: factor = 1  # If 9 or less factor is 1
	# Can just make the array so it essentially is and n in []
	# Special case array for six n values. store them in another array.

	for result in pred_results['data']:
		n += 1
		arr_time = converttime(result['attributes']['arrival_time'])
		dep_time = converttime(result['attributes']['departure_time'])
		time = dep_time if arr_time is None else arr_time
		stop_id = result['relationships']['stop']['data']['id']

		if time is not None and (n%factor == 0 or n == pred_length or n == 1):
			m += 1
		    # Drop the seconds but add a "+" if it is over 30 seconds (2nd half of minute)
			time = time[:-3] if int(time[-2:]) < 30 else time[:-3]+'+'
			pred_data.append({
				"m" : m,  # Dictionary index
    			"n" : n,  # Overall prediction index
    			"n_txt" : ordinal(n), # Prediction as a ordinal number
    			"stop_seq" : result['attributes']['stop_sequence'],
    			"stop_id" : stop_id,
    			"stop" : str.upper(getdata(stop_data,'stop_id',stop_id).get("stop_name")),
    			"time" : time
    		})

	if pred_data[0]["n_txt"] == '1st': pred_data[0]["n_txt"] = 'Next'

	#If pred_data length is 9 drop the 8th item:
	if len(pred_data) > 8:
		del pred_data[7]
	# If pred_data length is 7 slide 2nd back into the dictionary:

	return pred_data

# Function to return the shape of a route from the GTFS shape.txt file in a pandas df:
def getshape(shape_id):
#	print("shapeID:",shape_id)
	shape = shape_data[shape_data.shape_id == shape_id].values
        # SHOULD round lat/longs
	return shape

# Function to return the stops of a given trip:
def getstops(trip_id_in):
    #stop_ids = []
    # Get the stop ids for the trip (used to take forever!):
    try:
        pattern = trip_patterns[trip_patterns.trip_id == int(trip_id_in)]['pattern'].values[0]
    except:
        pattern = 0
        stops = []
        return stops

    stop_ids = unique_stopstrings[unique_stopstrings.pattern == pattern]['stops'].values[0][1:-1].split(',')

    # Get the stop lat/long from the stops_data dataframe:
    stops = stop_data[stop_data.stop_id.isin(stop_ids)].values
    # SHOULD uppercase stop name
    return stops

# Function to return the alerts for a given route:
def getalerts(route_id):
	alert_data = apiget('alerts','&filter[route]='+route_id)
	if alert_data is not None:
		alert = alert_data['data'][0]['attributes']['short_header']
	else:
		alert = ""
	return alert

#mbtaonbus
#On-bus prediction screen prototype using MBTA API. {not affiliated with the MBTA}
#    Copyright (C) 2019  MICHAEL HAYNES
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <https://www.gnu.org/licenses/>.

In [3]:
'''
Getting Vehicles
'''
temp = getvehicles()
print(temp)
print(len(temp))


['2095', '1624', '2096', '1919', '1677', '2059', '1924', '1991', '1319', '1800', '1292', '1448', '1641', '1660', '1804', '3274', '1451', '1429', '3106', '1656', '1862', '1722', '2028', '1731', '1793', '2014', '1665', '1611', '1250', '3003', '1701', '1405', '1442', '0795', '3311', '3280', '1928', '1765', '0776', '2045', '1620', '0843', '0636', '0872', '1937', '0837', '0616', '1692', '1792', '3151', '0860', '1444', '2046', '1878', '1713', '1420', '1676', '3159', '1982', '2094', '1948', '1290', '1956', '1811', '1852', '2073', '3292', '1879', '3157', '1817', '2077', '0839', '2008', '1209', '1931', '1703', '0862', '1671', '1730', '3301', '2010', '1709', '1855', '1849', '1950', '1612', '1638', '1961', '1796', '3282', '0789', '1902', '1289', '2062', '3287', '2068', '1645', '1819', '3154', '1646', '2024', '1965', '1994', '3122', '2107', '3234', '0848', '0765', '1706', '0868', '2032', '1975', '1211', '1258', '1629', '1939', '1821', '1822', '0846', '1865', '2112', '0818', '3220', '3243', '1453',

In [4]:
'''
Live Bus Data
'''
vehicle_data = getbasicdata(temp[0])
# 'lat': 42.29652907, 'long': -71.07228324,
print(vehicle_data)
print(vehicle_data.keys())


{'vehicle_number': 2095, 'lat': 42.43462654, 'long': -71.24040827, 'heading': 248, 'location': '42.43462654,-71.24040827,248', 'updated_at': '4:03:03', 'current_time': '4:03:08 PM', 'current_status': 'in transit to', 'route_id': '76', 'dir_id': 0, 'stop_id': '8604', 'stop': 'MARRETT RD @ DOWNING RD', 'trip_id': '58555911', 'route_name': '76', 'route_des': 'Lincoln Lab - Alewife Station', 'headsign': 'Lincoln Lab via Hanscom Airport', 'shape_id': '760136'}
dict_keys(['vehicle_number', 'lat', 'long', 'heading', 'location', 'updated_at', 'current_time', 'current_status', 'route_id', 'dir_id', 'stop_id', 'stop', 'trip_id', 'route_name', 'route_des', 'headsign', 'shape_id'])


In [5]:
'''
Stops
''' 
stops = getstops(vehicle_data['trip_id'])
print(stops[:3])

[[429 '12465' 'Wadsworth Rd @ Appleton St' 42.417136 -71.199535]
 [1437 '2201' 'West Service Rd @ George St' 42.411994 -71.184245]
 [1438 '2202' 'West Service Rd @ St Pauls Church' 42.412752 -71.186419]]


In [6]:
columns = ['object_id', 'stop_id', 'stop_name', 'X', 'Y']

# Create a DataFrame from the 'stops' variable and columns
df_stops = pd.DataFrame(stops, columns=columns)

# Print the first few rows of the DataFrame
print(df_stops.head())

  object_id stop_id                          stop_name          X          Y
0       429   12465         Wadsworth Rd @ Appleton St  42.417136 -71.199535
1      1437    2201        West Service Rd @ George St  42.411994 -71.184245
2      1438    2202  West Service Rd @ St Pauls Church  42.412752 -71.186419
3      1439    2203     West Service Rd @ Florence Ave  42.414845 -71.191827
4      1557    2353        West Service Rd @ Venner Rd  42.406193 -71.163768


# Data Downloaded from MBTA's website: https://mbta-massdot.opendata.arcgis.com/search?q=MBTA%20Bus&sort=-modified

## Bus Arrival Departure Times

In [7]:
import os
import pandas as pd
dfs = []  # Create an empty list to store dataframes

# Example: Read multiple CSV files
arr_dep_dir = '../data/MBTA_Website/MBTA_Bus_Arrival_Departure_Times_2023/'
csv_files = os.listdir(arr_dep_dir)
csv_files = [os.path.join(arr_dep_dir, i) for i in csv_files]

for f in csv_files:
    df = pd.read_csv(f)
    dfs.append(df)

df_arr_dep = pd.concat(dfs, axis=0, ignore_index=True)
df_arr_dep.head()

Unnamed: 0,service_date,route_id,direction_id,half_trip_id,stop_id,time_point_id,time_point_order,point_type,standard_type,scheduled,actual,scheduled_headway,headway
0,2023-01-01,1,Inbound,58061899.0,110,hhgat,1,Startpoint,Schedule,1900-01-01T06:05:00Z,1900-01-01T06:05:04Z,,
1,2023-01-01,1,Inbound,58061899.0,67,maput,2,Midpoint,Schedule,1900-01-01T06:09:00Z,1900-01-01T06:06:28Z,,
2,2023-01-01,1,Inbound,58061899.0,72,cntsq,3,Midpoint,Schedule,1900-01-01T06:12:00Z,1900-01-01T06:08:57Z,,
3,2023-01-01,1,Inbound,58061899.0,75,mit,4,Midpoint,Schedule,1900-01-01T06:15:00Z,1900-01-01T06:12:41Z,,
4,2023-01-01,1,Inbound,58061899.0,79,hynes,5,Midpoint,Schedule,1900-01-01T06:19:00Z,1900-01-01T06:16:35Z,,


## Bus Ridership By Time Period

In [8]:
df_bus_ridership = pd.read_csv('../data/MBTA_Website/MBTA_Bus_Ridership_by_Time_Period%2C_Season%2C_Route_Line%2C_and_Stop.csv')
df_bus_ridership.tail()

  df_bus_ridership = pd.read_csv('../data/MBTA_Website/MBTA_Bus_Ridership_by_Time_Period%2C_Season%2C_Route_Line%2C_and_Stop.csv')


Unnamed: 0,mode,season,route_id,route_name,route_variant,stop_sequence,direction_id,day_type_id,day_type_name,time_period_id,time_period_name,stop_name,stop_id,average_ons,average_offs,average_load,num_trips,ObjectId
868682,3,Fall 2022,99,99,99-8-0,23,0,day_type_02,saturday,time_period_10,OFF_PEAK,MALDEN CENTER WEST BUSWAY,5072,1.636364,2.254545,3.472727,11,868683
868683,3,Fall 2022,99,99,99-8-0,24,0,day_type_02,saturday,time_period_10,OFF_PEAK,SUMMER ST @ LINCOLN ST,5074,0.027273,0.154545,3.354545,11,868684
868684,3,Fall 2022,99,99,99-8-0,25,0,day_type_02,saturday,time_period_10,OFF_PEAK,SUMMER ST @ CLIFTON ST,5075,0.0,0.290909,3.045455,11,868685
868685,3,Fall 2022,99,99,99-8-0,26,0,day_type_02,saturday,time_period_10,OFF_PEAK,CLIFTON ST @ KERNWOOD ST,5076,0.0,0.145455,2.918182,11,868686
868686,3,Fall 2022,99,99,99-8-0,27,0,day_type_02,saturday,time_period_10,OFF_PEAK,CLIFTON ST @ DEXTER ST,5077,0.009091,0.0,2.927273,11,868687


## Bus Reliability

In [9]:
df_reliability = pd.read_csv('../data/MBTA_Website/MBTA_Bus%2C_Commuter_Rail%2C_%26_Rapid_Transit_Reliability.csv')
print(df_reliability.shape)
df_reliability = df_reliability[df_reliability['mode_type'].str.contains('Bus')]
print(df_reliability.shape)
df_reliability.head()

(817294, 13)
(708397, 13)


Unnamed: 0,service_date,gtfs_route_id,gtfs_route_short_name,gtfs_route_long_name,gtfs_route_desc,route_category,mode_type,peak_offpeak_ind,metric_type,otp_numerator,otp_denominator,cancelled_numerator,ObjectId
7,2023/09/30 04:00:00+00,553,553,,Express Bus,Other Bus,Bus,OFF_PEAK,Headway / Schedule Adherence,39.0,55.0,,8
8,2023/09/30 04:00:00+00,34E,34E,,Local Bus,Other Bus,Bus,OFF_PEAK,Headway / Schedule Adherence,448.0,837.0,,9
9,2023/09/30 04:00:00+00,36,36,,Local Bus,Other Bus,Bus,OFF_PEAK,Headway / Schedule Adherence,342.0,441.0,,10
10,2023/09/30 04:00:00+00,411,411,,Local Bus,Other Bus,Bus,OFF_PEAK,Headway / Schedule Adherence,58.0,153.0,,11
11,2023/09/30 04:00:00+00,429,429,,Local Bus,Other Bus,Bus,OFF_PEAK,Headway / Schedule Adherence,122.0,192.0,,12


## Monthly Ridership

In [10]:
df_monthly_ridership = pd.read_csv('../data/MBTA_Website/MBTA_Monthly_Ridership_by_Mode.csv')
df_monthly_ridership = df_monthly_ridership[df_monthly_ridership['mode'].str.contains('Bus')]
print(df_monthly_ridership.shape)
df_monthly_ridership.head()

(182, 10)


Unnamed: 0,service_date,mode,route_or_line,total_monthly_weekday_ridership,average_monthly_weekday_ridersh,countofdates_weekday,total_monthly_ridership,average_monthly_ridership,countofdates,ObjectId
4,2016/01/01 05:00:00+00,Bus,Bus,6985521.0,367659.0,19,8225148.0,265328.0,31,5
9,2016/01/01 05:00:00+00,Bus,Silver Line,669761.0,35251.0,19,772265.0,24912.0,31,10
15,2016/02/01 05:00:00+00,Bus,Bus,7469019.0,373451.0,20,8191566.0,282468.0,29,16
20,2016/02/01 05:00:00+00,Bus,Silver Line,730265.0,36513.0,20,734150.0,25316.0,29,21
26,2016/03/01 05:00:00+00,Bus,Bus,8679363.0,377364.0,23,9456633.0,305052.0,31,27


## Facilities (Wheelchair facility)

In [11]:
df_facilities = pd.read_csv('../data/MBTA_Website/MBTA_Systemwide_GTFS_Map.csv')
df_facilities.dropna(subset=['X', 'Y', 'wheelchair_facility'], inplace=True)
df_facilities.reset_index(drop=True, inplace=True)
df_facilities.head()

Unnamed: 0,X,Y,OBJECTID,facility_id,facility_code,facility_class,facility_type,stop_id,facility_short_name,facility_long_name,facility_desc,facility_lat,facility_lon,wheelchair_facility,lat,lon,created_user,created_date,last_edited_user,last_edited_date
0,-71.071098,42.436689,139112,102,102.0,1,escalator,place-ogmnl,Washington Street to unpaid lobby,Oak Grove Escalator 102 (Washington Street to ...,,,,2,42.43668,-71.071097,DOT_ADMIN,2023/05/30 14:08:48+00,DOT_ADMIN,2023/05/30 14:08:48+00
1,-71.071098,42.436689,139113,103,103.0,1,escalator,place-ogmnl,Orange Line platform to paid lobby,Oak Grove Escalator 103 (Orange Line platform ...,,,,2,42.43668,-71.071097,DOT_ADMIN,2023/05/30 14:08:48+00,DOT_ADMIN,2023/05/30 14:08:48+00
2,-71.071098,42.436689,139114,104,104.0,1,escalator,place-ogmnl,"Parking, busway to unpaid lobby","Oak Grove Escalator 104 (Parking, busway to un...",,,,2,42.43668,-71.071097,DOT_ADMIN,2023/05/30 14:08:48+00,DOT_ADMIN,2023/05/30 14:08:48+00
3,-71.071098,42.436689,139115,105,105.0,1,escalator,place-ogmnl,Commuter Rail platform to unpaid lobby,Oak Grove Escalator 105 (Commuter Rail platfor...,,,,2,42.43668,-71.071097,DOT_ADMIN,2023/05/30 14:08:48+00,DOT_ADMIN,2023/05/30 14:08:48+00
4,-71.060226,42.355527,139116,112,112.0,1,escalator,place-dwnxg,Ashmont/Braintree platform to Chauncy Street,Downtown Crossing Escalator 112 (Ashmont/Brain...,,,,2,42.355518,-71.060225,DOT_ADMIN,2023/05/30 14:08:48+00,DOT_ADMIN,2023/05/30 14:08:48+00


In [12]:
### Displaying Wheelchair facilities

import folium 
from folium.plugins import FastMarkerCluster

m = folium.Map(location=[42.3601, -71.0589], zoom_start=12) # centered at Boston
filtered_data = df_facilities[df_facilities['wheelchair_facility'] >= 1]
marker_cluster = FastMarkerCluster(data=list(zip(filtered_data['lat'], filtered_data['lon'])))
marker_cluster.add_to(m)
display(m)