<h1>Presentation Journal</h1>

In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from sqlalchemy import create_engine

database_name = 'scooters'
connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

engine = create_engine(connection_string)

# Read in the bus stops data csv
bus_stops_df = pd.read_csv('../data/regional_transportation_authority_bus_stops.csv')

<h3>Does it appear that scooters are used as "last mile" transportation from public transit hubs to work or school?</h3>

<h4>First, we need to query all of the trips into 1 dataframe</h4>

In [65]:
trips_query = """
select *
from trips
"""

trips_df = pd.read_sql(trips_query, engine)

<h4>Next, we will define a function that calculates the shortest distance between 2 coordinates.</h4>

In [66]:
def shortest_distance(start_latitude, start_longitude, end_latitude, end_longitude):
    """
    Calculating the distance between 2 coordinates using the haversine formula
    https://en.wikipedia.org/wiki/Haversine_formula
    https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points
    """
    # We have to convert the latitudes and longitudes to radians for the formula to work
    start_latitude, start_longitude, end_latitude, end_longitude = map(
        np.radians,
        [start_latitude, start_longitude, end_latitude, end_longitude]
    )
                                                 
    # Now we build out the formula
    distance_longitude = end_longitude - start_longitude
    distance_latitude = end_latitude - start_latitude
    
    angle = np.sin(distance_latitude / 2) ** 2 + np.cos(start_latitude) * np.cos(end_latitude) * np.sin(distance_longitude / 2) ** 2
    central_angle = 2 * np.arcsin(np.sqrt(angle))
    km = 6371 * central_angle # The Earth is 6371 km in radius
    miles = km * 0.621371 # Converting to miles

    return miles



<h4>Then, we will apply that function to our dataframe using a lambda function.</h4>

In [67]:
trips_df['distance'] = trips_df.apply(lambda x: shortest_distance(x['startlatitude'], x['startlongitude'], x['endlatitude'], x['endlongitude']), axis=1)
print(trips_df.head())


             pubtimestamp companyname triprecordnum         sumdid   
0 2019-05-01 00:00:55.423        Bird       BRD2134   Powered9EAJL  \
1 2019-05-01 00:03:33.147        Lyft          LFT5  Powered296631   
2 2019-05-01 00:05:55.570        Bird       BRD2168   Powered7S2UU   
3 2019-05-01 00:05:55.570        Bird       BRD2166   PoweredZIIVX   
4 2019-05-01 00:05:55.570        Bird       BRD2165   PoweredJ7MB3   

   tripduration  tripdistance   startdate        starttime     enddate   
0        3.0000     958.00528  2019-05-01  00:00:20.460000  2019-05-01  \
1        1.7156    1371.39112  2019-05-01  00:01:50.090000  2019-05-01   
2        3.0000    2296.58800  2019-05-01  00:03:47.363333  2019-05-01   
3        3.0000    1200.78744  2019-05-01  00:04:21.386666  2019-05-01   
4        2.0000     351.04988  2019-05-01  00:04:27.796666  2019-05-01   

           endtime  startlatitude  startlongitude  endlatitude  endlongitude   
0  00:02:52.346666       36.15710       -86.80360     

<h4>Now let's see the numbers on scooter trips under and over 1 mile. These will be used for later data discovery.</h4>

In [68]:
# Getting counts on trips that are less than 1 mile
trips_under_1_mile_df = trips_df[trips_df['distance'] < 1]
trips_1_mile_or_more_df = trips_df[trips_df['distance'] >= 1]

number_of_trips_under_1_mile = len(trips_under_1_mile_df)
number_of_trips_1_mile_or_more = len(trips_1_mile_or_more_df)

# Getting the percentage of trips that are less than 1 mile
percentage_of_trips_under_1_mile = number_of_trips_under_1_mile / (number_of_trips_under_1_mile + number_of_trips_1_mile_or_more)
percentage_of_trips_1_mile_or_more = number_of_trips_1_mile_or_more / (number_of_trips_under_1_mile + number_of_trips_1_mile_or_more)

print(f'The number of trips under 1 mile is {number_of_trips_under_1_mile} ({round(percentage_of_trips_under_1_mile * 100, 2)}%)')
print(f'The number of trips 1 mile or more is {number_of_trips_1_mile_or_more} ({round(percentage_of_trips_1_mile_or_more * 100, 2)}%)')

The number of trips under 1 mile is 477556 (84.45%)
The number of trips 1 mile or more is 87966 (15.55%)


<h3>Base Map Template</h3>

<h4>Here, we are plotting the public transportation stops.</h4>

In [69]:
# Dropping all unnecessary columns in the bus stops dataframe
bus_stops_df = bus_stops_df.drop(columns=['Stop ID Number', 'Stop Abbreviation', 'Bench', 'Shelter'])

print(bus_stops_df.head())

# Split out the Mapped Location column into latitude and longitude columns
# We need to drop the opening and closing parentheses and then split on the comma
bus_stops_df['Mapped Location'] = bus_stops_df['Mapped Location'].str.replace('(', '')
bus_stops_df['Mapped Location'] = bus_stops_df['Mapped Location'].str.replace(')', '')
bus_stops_df[['latitude', 'longitude']] = bus_stops_df['Mapped Location'].str.split(',', expand=True)

print(bus_stops_df.head())

# Dropping the now unnecessary 'Mapped Location' column
bus_stops_df = bus_stops_df.drop(columns=['Mapped Location'])

base_map = folium.Map(
    location=[36.1627, -86.7816],
    zoom_start=12
)

base_map

                            Stop Name  Line Number               Line Name   
0     MUSIC CITY CENTRAL 4TH - BAY 20           94     CLARKSVILLE EXPRESS  \
1      MUSIC CITY CENTRAL 5TH - BAY 6           94     CLARKSVILLE EXPRESS   
2       21ST AVE PAST WEST END AVE SB           87        GALLATIN EXPRESS   
3  MUSIC CITY STAR MT. JULIET STATION           90         MUSIC CITY STAR   
4      MUSIC CITY CENTRAL 5TH - BAY 8           92  HENDERSONVILLE EXPRESS   

           Mapped Location  
0  (36.166545, -86.781895)  
1  (36.166501, -86.781233)  
2  (36.149489, -86.800523)  
3  (36.199912, -86.517904)  
4  (36.166768, -86.781424)  
                            Stop Name  Line Number               Line Name   
0     MUSIC CITY CENTRAL 4TH - BAY 20           94     CLARKSVILLE EXPRESS  \
1      MUSIC CITY CENTRAL 5TH - BAY 6           94     CLARKSVILLE EXPRESS   
2       21ST AVE PAST WEST END AVE SB           87        GALLATIN EXPRESS   
3  MUSIC CITY STAR MT. JULIET STATION        