<h1>Presentation Journal</h1>

In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from sqlalchemy import create_engine

database_name = 'scooters'
connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

engine = create_engine(connection_string)

# Read in the bus stops data csv
bus_stops_df = pd.read_csv('../data/regional_transportation_authority_bus_stops.csv')

# Feet in miles constant
FEET_IN_MILES = 5280

<h3>Does it appear that scooters are used as "last mile" transportation from public transit hubs to work or school?</h3>

<h4>First, we need to query all of the trips into 1 dataframe</h4>

In [41]:
trips_query = """
select *
from trips
"""

trips_df = pd.read_sql(trips_query, engine)

<h4>Next, we will define a function that calculates the shortest distance between 2 coordinates.</h4>

In [42]:
def shortest_distance(start_latitude, start_longitude, end_latitude, end_longitude):
    """
    Calculating the distance between 2 coordinates using the haversine formula
    https://en.wikipedia.org/wiki/Haversine_formula
    https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points
    """
    # We have to convert the latitudes and longitudes to radians for the formula to work
    start_latitude, start_longitude, end_latitude, end_longitude = map(
        np.radians,
        [start_latitude, start_longitude, end_latitude, end_longitude]
    )
                                                 
    # Now we build out the formula
    distance_longitude = end_longitude - start_longitude
    distance_latitude = end_latitude - start_latitude
    
    angle = np.sin(distance_latitude / 2) ** 2 + np.cos(start_latitude) * np.cos(end_latitude) * np.sin(distance_longitude / 2) ** 2
    central_angle = 2 * np.arcsin(np.sqrt(angle))
    km = 6371 * central_angle # The Earth is 6371 km in radius
    miles = km * 0.621371 # Converting to miles

    return miles



<h4>Now let's see the numbers on scooter trips under and over 1 mile. These will be used for later data discovery.</h4>

In [55]:
# Getting counts on trips that are less than 1 mile
trips_under_1_mile_df = trips_df[trips_df['tripdistance'] < FEET_IN_MILES]
trips_1_mile_or_more_df = trips_df[trips_df['tripdistance'] >= FEET_IN_MILES]
trips_under_500_feet_df = trips_df[trips_df['tripdistance'] < 500]

number_of_trips_under_1_mile = len(trips_under_1_mile_df)
number_of_trips_1_mile_or_more = len(trips_1_mile_or_more_df)
number_of_trips_under_500_feet = len(trips_under_500_feet_df)

# Getting the percentage of trips that are less than 1 mile
percentage_of_trips_under_1_mile = number_of_trips_under_1_mile / (number_of_trips_under_1_mile + number_of_trips_1_mile_or_more)
percentage_of_trips_1_mile_or_more = number_of_trips_1_mile_or_more / (number_of_trips_under_1_mile + number_of_trips_1_mile_or_more)

# Getting the percentage of trips that are less than 500 feet
percentage_of_trips_under_500_feet = number_of_trips_under_500_feet / (number_of_trips_under_1_mile + number_of_trips_1_mile_or_more)

print(f'The number of trips under 1 mile is {number_of_trips_under_1_mile} ({round(percentage_of_trips_under_1_mile * 100, 2)}%)')
print(f'The number of trips 1 mile or more is {number_of_trips_1_mile_or_more} ({round(percentage_of_trips_1_mile_or_more * 100, 2)}%)')
print(f'The number of trips under 500 feet is {number_of_trips_under_500_feet} ({round(percentage_of_trips_under_500_feet * 100, 2)}%)')

The number of trips under 1 mile is 392076 (69.33%)
The number of trips 1 mile or more is 173446 (30.67%)
The number of trips under 500 feet is 148503 (26.26%)


<h3>Base Map Template</h3>

<h4>Here, we are plotting the public transportation stops.</h4>

In [44]:
# Dropping all unnecessary columns in the bus stops dataframe
bus_stops_df = bus_stops_df.drop(columns=['Stop ID Number', 'Stop Abbreviation', 'Bench', 'Shelter'])

# Split out the Mapped Location column into latitude and longitude columns
# We need to drop the opening and closing parentheses and then split on the comma
bus_stops_df['Mapped Location'] = bus_stops_df['Mapped Location'].str.replace('(', '')
bus_stops_df['Mapped Location'] = bus_stops_df['Mapped Location'].str.replace(')', '')
bus_stops_df[['latitude', 'longitude']] = bus_stops_df['Mapped Location'].str.split(',', expand=True)

# Dropping the now unnecessary 'Mapped Location' column
bus_stops_df = bus_stops_df.drop(columns=['Mapped Location'])

<h4>Now we want to add the scooter data to the map where the trip started within 1 mile of a bus stop and the duration was less than 1 mile.</h4>

In [46]:
# Build constants for the bus stop radius so that we increase/decrease the radius based on the number of bus stops
# Then, use a round function to round the radius to constant value
BUS_STOP_BEGIN_TRIP_RADIUS = 0.25

In [49]:
# First, we need to get the bus stop coordinates into a list of tuples and cast each value to a float
bus_stop_coordinates = list(zip(bus_stops_df['latitude'].astype(float), bus_stops_df['longitude'].astype(float)))

# Now we need to get the scooter data that meets our criteria by looping over the dataframe and comparing each row to the bus stop coordinates

# Declare an empty dataframe
last_mile_df = pd.DataFrame(columns=trips_under_1_mile_df.columns)

# Loop over all of the under 1 mile trips
loop_count = 0
for index, row in trips_under_1_mile_df.iterrows():
    scooter_coordinates = (row['startlatitude'], row['startlongitude'])

    # Loop over bus stop coordinates and compare to starting trip coordinates
    for bus_stop_co in bus_stop_coordinates:
        # Distance is in miles
        start_distance_from_bus_stop = shortest_distance(scooter_coordinates[0], scooter_coordinates[1], bus_stop_co[0], bus_stop_co[1])

        if start_distance_from_bus_stop < BUS_STOP_BEGIN_TRIP_RADIUS:
            # Add the row to the dataframe
            last_mile_df = pd.concat([last_mile_df, pd.DataFrame([row])], ignore_index=True)
            break

    loop_count += 1

    # avoid an infinite loop
    if loop_count > len(trips_under_1_mile_df):
        break

    # Debug only below
    if loop_count % 1000 == 0:
        print(f'Loop count: {loop_count}')

Loop count: 1000
Loop count: 2000
Loop count: 3000
Loop count: 4000
Loop count: 5000
Loop count: 6000
Loop count: 7000
Loop count: 8000
Loop count: 9000
Loop count: 10000
Loop count: 11000
Loop count: 12000
Loop count: 13000
Loop count: 14000
Loop count: 15000
Loop count: 16000
Loop count: 17000
Loop count: 18000
Loop count: 19000
Loop count: 20000
Loop count: 21000
Loop count: 22000
Loop count: 23000
Loop count: 24000
Loop count: 25000
Loop count: 26000
Loop count: 27000
Loop count: 28000
Loop count: 29000
Loop count: 30000
Loop count: 31000
Loop count: 32000
Loop count: 33000
Loop count: 34000
Loop count: 35000
Loop count: 36000
Loop count: 37000
Loop count: 38000
Loop count: 39000
Loop count: 40000
Loop count: 41000
Loop count: 42000
Loop count: 43000
Loop count: 44000
Loop count: 45000
Loop count: 46000
Loop count: 47000
Loop count: 48000
Loop count: 49000
Loop count: 50000
Loop count: 51000
Loop count: 52000
Loop count: 53000
Loop count: 54000
Loop count: 55000
Loop count: 56000
L

<h4>The short trips that started near the bus stops have been compiled. Now, we will cluster plot them onto the base map.</h4>

In [52]:
print(last_mile_df.head())

             pubtimestamp companyname triprecordnum        sumdid   
0 2019-05-01 00:05:55.570        Bird       BRD2167  PoweredEJF1K  \
1 2019-05-01 00:10:56.013        Bird       BRD2193  PoweredSX2AW   
2 2019-05-01 00:10:56.013        Bird       BRD2191  PoweredVS299   
3 2019-05-01 00:10:56.013        Bird       BRD2197  Powered5R8S1   
4 2019-05-01 00:10:56.013        Bird       BRD2196  PoweredF8GJP   

   tripduration  tripdistance   startdate        starttime     enddate   
0           3.0       984.252  2019-05-01  00:03:56.546666  2019-05-01  \
1           6.0      2624.672  2019-05-01  00:06:34.436666  2019-05-01   
2           3.0       984.252  2019-05-01  00:09:14.073333  2019-05-01   
3           9.0      3280.840  2019-05-01  00:01:50.923333  2019-05-01   
4           9.0       328.084  2019-05-01  00:03:32.563333  2019-05-01   

           endtime  startlatitude  startlongitude  endlatitude  endlongitude   
0  00:06:45.786666        36.1544        -86.7862      36.15

In [59]:
# Initial boilerplate code for the map
base_map = folium.Map(
    location=[36.1627, -86.7816],
    zoom_start=12
)

# Create marker clusters
bus_stop_marker_cluster = MarkerCluster().add_to(base_map)
scooter_marker_cluster = MarkerCluster().add_to(base_map)

# Add bus stop markers to the map
for index, row in bus_stops_df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['Stop Name'],
        icon=folium.Icon(color='blue', icon='bus', prefix='fa')
    ).add_to(bus_stop_marker_cluster)

# Add scooter markers to the map
for index, row in last_mile_df.iterrows():
    folium.Marker(
        location=[row['endlatitude'], row['endlongitude']],
        popup=str(row['enddate']) + ' : ' + row['triprecordnum'],
        icon=folium.Icon(color='green', icon='bolt', prefix='fa')
    ).add_to(scooter_marker_cluster)

# Save an interactive HTML map
base_map.save('../maps/last_mile_trip_map.html')

AttributeError: 'datetime.date' object has no attribute 'toStr'