In [62]:
from sqlalchemy import create_engine, text
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as pyplot
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
import math

In [53]:
database_name = 'scooters' 

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [54]:
engine = create_engine(connection_string)

query = '''
SELECT 
    COUNT(sumdid),
    COUNT(DISTINCT sumdid)
FROM scooters;
'''

with engine.connect() as connection:
    scoots = pd.read_sql(text(query), con = connection)

scoots.head()

query = '''
SELECT 
    COUNT(sumdid),
    COUNT(DISTINCT sumdid)
FROM trips;
'''

with engine.connect() as connection:
    trips = pd.read_sql(text(query), con = connection)

trips.head()

query = '''
SELECT 
    MIN(latitude) AS min_lat,
    MAX(latitude) AS max_lat,
    MIN(longitude) AS min_lon,
    MAX(longitude) AS max_lon
FROM scooters;
'''

with engine.connect() as connection:
    scoots = pd.read_sql(text(query), con = connection)

scoots

query = '''
SELECT *
FROM scooters
LIMIT 100;
'''

with engine.connect() as connection:
    scoots = pd.read_sql(text(query), con = connection)

scoots.head()

query = '''
SELECT *
FROM trips
LIMIT 100;
'''

with engine.connect() as connection:
    trips = pd.read_sql(text(query), con = connection)

trips.info()

query = '''
SELECT *
FROM trips
'''

trips = pd.read_sql(query, con = engine)

query = '''
SELECT 
    companyname,
    COUNT(DISTINCT sumdid)
FROM scooters
GROUP BY companyname;
'''

with engine.connect() as connection:
    scoots = pd.read_sql(text(query), con = connection)

scoots

query = '''
SELECT 
    companyname,
    EXTRACT('MONTH' FROM pubdatetime) AS month,
    COUNT(DISTINCT sumdid)
FROM scooters
GROUP BY companyname, EXTRACT('MONTH' FROM pubdatetime);
'''

with engine.connect() as connection:
    scoots = pd.read_sql(text(query), con = connection)

scoots

THE FOLLOWING CODE WAS DONE VIA GROUP WORK

In [55]:
query = '''
SELECT *
FROM trips
'''

trips = pd.read_sql(query, con = engine)

The goal of Metro Nashville is to have each scooter used a minimum of 3 times per day. Based on the data, what is the average number of trips per scooter per day? Make sure to consider the days that a scooter was available. How does this vary by company?

In [56]:
trips['MM-DD'] = trips['pubtimestamp'].dt.strftime('%m-%d')

In [57]:
number3 = trips[['MM-DD','sumdid','companyname']].groupby(['MM-DD','sumdid'])

In [58]:
number3 = number3.value_counts().reset_index()

In [59]:
number3.groupby(['MM-DD','companyname'])['count'].mean()

MM-DD  companyname  
05-01  Bird             1.546218
       Lyft             2.707617
05-02  Bird             1.612946
       Lime             4.708013
       Lyft             2.291855
                          ...   
08-01  Bolt Mobility    1.000000
       JUMP             1.000000
       Lime             1.000000
       Lyft             1.000000
       SPIN             1.000000
Name: count, Length: 528, dtype: float64

SUMDs can provide alternative transportation and provide "last mile" access to public transit. How often are trips starting near public transit hubs? You can download a dataset of bus stop locations from https://data.nashville.gov/Transportation/Regional-Transportation-Authority-Bus-Stops/p886-fnbd.

In [60]:
buses = pd.read_csv("../data/Regional_Transportation_Authority_Bus_Stops_20240106.csv")

type(buses)

pandas.core.frame.DataFrame

In [None]:
trips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 565522 entries, 0 to 565521
Data columns (total 17 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   pubtimestamp    565522 non-null  datetime64[ns]
 1   companyname     565522 non-null  object        
 2   triprecordnum   565522 non-null  object        
 3   sumdid          565522 non-null  object        
 4   tripduration    565522 non-null  float64       
 5   tripdistance    565522 non-null  float64       
 6   startdate       565522 non-null  object        
 7   starttime       565522 non-null  object        
 8   enddate         565522 non-null  object        
 9   endtime         565522 non-null  object        
 10  startlatitude   565522 non-null  float64       
 11  startlongitude  565522 non-null  float64       
 12  endlatitude     565522 non-null  float64       
 13  endlongitude    565522 non-null  float64       
 14  triproute       565522 non-null  obj

In [None]:
buses.head()

Unnamed: 0,Stop ID Number,Stop Abbreviation,Stop Name,Bench,Shelter,Line Number,Line Name,Mapped Location
0,4418,MCC4_20,MUSIC CITY CENTRAL 4TH - BAY 20,False,True,94,CLARKSVILLE EXPRESS,"(36.166545, -86.781895)"
1,4422,MCC5_6,MUSIC CITY CENTRAL 5TH - BAY 6,True,True,94,CLARKSVILLE EXPRESS,"(36.166501, -86.781233)"
2,4249,21WE,21ST AVE PAST WEST END AVE SB,False,False,87,GALLATIN EXPRESS,"(36.149489, -86.800523)"
3,4184,MCSMJ,MUSIC CITY STAR MT. JULIET STATION,True,True,90,MUSIC CITY STAR,"(36.199912, -86.517904)"
4,4425,MCC5_8,MUSIC CITY CENTRAL 5TH - BAY 8,False,True,92,HENDERSONVILLE EXPRESS,"(36.166768, -86.781424)"


In [None]:
zipcodes = gpd.read_file('../data/zipcodes.geojson')
print(zipcodes.crs)
zipcodes.head( )

EPSG:4326


Unnamed: 0,zip,objectid,po_name,shape_stlength,shape_starea,geometry
0,37115,1,MADISON,178783.0248888682,596553400.5788574,"MULTIPOLYGON (((-86.68725 36.31821, -86.68722 ..."
1,37216,3,NASHVILLE,75820.99782140006,188884682.28344727,"MULTIPOLYGON (((-86.73451 36.23774, -86.73425 ..."
2,37204,9,NASHVILLE,93180.2922504256,200664795.51708984,"MULTIPOLYGON (((-86.77914 36.13424, -86.77923 ..."
3,37027,11,BRENTWOOD,159760.6942933173,174978422.04101562,"MULTIPOLYGON (((-86.81258 36.06319, -86.81263 ..."
4,37064,18,FRANKLIN,28995.828320601937,46969608.005737305,"MULTIPOLYGON (((-87.02197 36.01200, -87.02140 ..."


In [None]:
zipcodes.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [None]:
scoot_geo = gpd.GeoDataFrame(trips, 
                           crs = zipcodes.crs, 
                           geometry = gpd.points_from_xy(trips.startlatitude, trips.startlongitude))

In [None]:
zipcodes = zipcodes[['zip', 'po_name', 'geometry']]

In [None]:
stops_by_zip = gpd.sjoin(bus_geo, zipcodes, predicate = 'within')

In [None]:
bus2 = buses['Mapped Location'].str.strip('()').str.split(', ', expand=True).rename(columns={0:'Latitude', 1:'Longitude'}) 

In [None]:
bus2.head(2)

Unnamed: 0,Latitude,Longitude
0,36.166545,-86.781895
1,36.166501,-86.781233


In [66]:
bus2 = buses['Mapped Location'].str.strip('()').str.split(', ', expand=True).rename(columns={0:'Latitude', 1:'Longitude'})

buses[['Latitude', 'Longitude']] = bus2[['Latitude', 'Longitude']]

buses['Latitude'] = buses['Latitude'].astype(float)
buses['Longitude'] = buses['Longitude'].astype(float)

busg = buses.groupby('Stop Name')
type(busg)

pandas.core.groupby.generic.DataFrameGroupBy

In [64]:
LATBUF = 0.0144927536231884
LONBUF = 0.0183150183150183
DIBUF = 0.0233555091

In [67]:
for tlat, tlong in zip(trips.startlatitude,trips.startlongitude):
    for blat, blong in zip(buses.Latitude,buses.Longitude):
        diflat = abs(tlat)-abs(blat)
        diflong = abs(tlong)-abs(blong)
        dist = math.sqrt((diflat * diflat) + (diflong * diflong))
        if dist <= DIBUF :
            trips['near']='y'
            break
        else : trips['near']= 'n'

In [68]:
trips.head()

Unnamed: 0,pubtimestamp,companyname,triprecordnum,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,create_dt,MM-DD,near
0,2019-05-01 00:00:55.423,Bird,BRD2134,Powered9EAJL,3.0,958.00528,2019-05-01,00:00:20.460000,2019-05-01,00:02:52.346666,36.1571,-86.8036,36.1566,-86.8067,"[(36.157235, -86.803612), (36.157235, -86.8036...",2019-05-02 05:30:23.780,05-01,y
1,2019-05-01 00:03:33.147,Lyft,LFT5,Powered296631,1.7156,1371.39112,2019-05-01,00:01:50.090000,2019-05-01,00:03:33.026666,36.15797,-86.77896,36.16054,-86.77689,"[(36.15797, -86.77896), (36.15795, -86.77873),...",2019-05-02 07:20:32.757,05-01,y
2,2019-05-01 00:05:55.570,Bird,BRD2168,Powered7S2UU,3.0,2296.588,2019-05-01,00:03:47.363333,2019-05-01,00:07:13.596666,36.1547,-86.7818,36.1565,-86.7868,"[(36.155068, -86.782124), (36.156597, -86.78675)]",2019-05-02 05:30:24.530,05-01,y
3,2019-05-01 00:05:55.570,Bird,BRD2166,PoweredZIIVX,3.0,1200.78744,2019-05-01,00:04:21.386666,2019-05-01,00:06:59.176666,36.1494,-86.7795,36.1531,-86.7796,"[(36.149741, -86.779344), (36.149741, -86.7793...",2019-05-02 05:30:24.237,05-01,y
4,2019-05-01 00:05:55.570,Bird,BRD2165,PoweredJ7MB3,2.0,351.04988,2019-05-01,00:04:27.796666,2019-05-01,00:06:23.150000,36.1778,-86.7866,36.1774,-86.7876,"[(36.177699, -86.786477), (36.177711, -86.7864...",2019-05-02 05:30:24.207,05-01,y


In [70]:
trip_csv = trips.drop(columns=['create_dt', 'triprecordnum'] )
trip_csv.head(2)

Unnamed: 0,pubtimestamp,companyname,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,MM-DD,near
0,2019-05-01 00:00:55.423,Bird,Powered9EAJL,3.0,958.00528,2019-05-01,00:00:20.460000,2019-05-01,00:02:52.346666,36.1571,-86.8036,36.1566,-86.8067,"[(36.157235, -86.803612), (36.157235, -86.8036...",05-01,y
1,2019-05-01 00:03:33.147,Lyft,Powered296631,1.7156,1371.39112,2019-05-01,00:01:50.090000,2019-05-01,00:03:33.026666,36.15797,-86.77896,36.16054,-86.77689,"[(36.15797, -86.77896), (36.15795, -86.77873),...",05-01,y


In [71]:
trip_csv.to_csv('../data/trip_csv')