In [1]:
from sqlalchemy import create_engine, text
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from shapely.geometry import Point

In [2]:
database_name = 'nashville_scooters'    # Fill this in with your database name

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

engine = create_engine(connection_string)

In [11]:
query = '''

WITH cte1 AS (SELECT pubtimestamp::date,
    		companyname,
    		sumdid, 
			startdate+starttime AS trip_start,
			enddate+endtime AS trip_end
			FROM TRIPS),
	cte2 as 
	 	(SELECT pubdatetime::date AS pubtimestamp, 
		 		sumdid, 
		 		costpermin,
		 		companyname
			FROM (SELECT pubdatetime, 
				  		 sumdid, 
				  		 costpermin,
				  	CASE WHEN companyname = 'Bolt' THEN 'Bolt Mobility'
		 				 WHEN companyname = 'Jump' THEN 'JUMP' 
				  		 WHEN companyname = 'Spin' THEN 'SPIN' ELSE companyname END AS companyname 
				  --this subbquery is so we can join on company name bellow
		  			FROM scooters) AS scooters2 
			GROUP BY sumdid, companyname, pubtimestamp, costpermin)
SELECT 
    pubtimestamp::date AS date,
    companyname,
	min(costpermin) as min_cost,
    sumdid, 
	SUM(ROUND(EXTRACT(EPOCH FROM (trip_end - trip_start)) / 60.0, 2)) AS total_min_per_day_used,
	ROUND((SUM(ROUND(EXTRACT(EPOCH FROM (trip_end - trip_start)) / 60.0, 2)) / 1440.0 * 100)::numeric, 3) || '%' AS trip_use_per_day_percent
FROM trips
FULL JOIN cte1 USING(pubtimestamp, companyname, sumdid)
FULL JOIN cte2 USING(pubtimestamp, sumdid, companyname)
GROUP BY sumdid, companyname, date, costpermin
HAVING SUM(ROUND(EXTRACT(EPOCH FROM (trip_end - trip_start)) / 60.0, 2)) < 1440 
;
'''

with engine.connect() as connection:    
    trips_main = pd.read_sql(text(query), con = connection)

trips_main

Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%
...,...,...,...,...,...,...
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%
203169,2019-06-25,Lime,0.15,PoweredZZQOO54WD4AJT,33.92,2.356%


In [13]:
trips_duration = trips_main.loc[trips_main['min_cost'] > 0]

In [14]:
tripscost = trips_duration

In [15]:
tripscost

Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%
...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,5.45,0.378%
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%


In [16]:
## i'm interested in the MONTHLY average cost? i think. so here's what i'd want to look into
# we have seven companies and three months. we don't see a lot of use in scooters until the last week of may.
#for each month, add up the total_min_per_day used and divide b y the days represnted in our data...or just grab the mean?

In [19]:
tripscost['date'] = pd.to_datetime(tripscost['date'])
tripscost.info()

<class 'pandas.core.frame.DataFrame'>
Index: 156994 entries, 0 to 203169
Data columns (total 6 columns):
 #   Column                    Non-Null Count   Dtype         
---  ------                    --------------   -----         
 0   date                      156994 non-null  datetime64[ns]
 1   companyname               156994 non-null  object        
 2   min_cost                  156994 non-null  float64       
 3   sumdid                    156994 non-null  object        
 4   total_min_per_day_used    156994 non-null  float64       
 5   trip_use_per_day_percent  156994 non-null  object        
dtypes: datetime64[ns](1), float64(2), object(3)
memory usage: 8.4+ MB


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tripscost['date'] = pd.to_datetime(tripscost['date'])


In [20]:
tripscost['month'] = tripscost['date'].dt.month
tripscost

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tripscost['month'] = tripscost['date'].dt.month


Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent,month
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%,5
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%,5
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%,5
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%,5
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%,5
...,...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,5.45,0.378%,6
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%,6
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%,6
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%,6


In [21]:
tripscost.loc[tripscost['date'] == '2019-05-24']

Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent,month
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%,5
44,2019-05-24,Bolt Mobility,0.15,Powered-01a24436-0315-e1bb-7ce0-d081d05dff7d,15.20,1.056%,5
141,2019-05-24,Bolt Mobility,0.15,Powered-0479bb84-afbd-0426-f1c4-df628542a88c,76.59,5.319%,5
181,2019-05-24,Bolt Mobility,0.15,Powered-0540c92c-0907-dafa-b19f-011495068ec5,82.26,5.713%,5
218,2019-05-24,Bolt Mobility,0.15,Powered-0555d61d-1c59-4219-edf3-0f00e286629a,9.12,0.633%,5
...,...,...,...,...,...,...,...
202971,2019-05-24,Bird,0.15,PoweredZYM3M,3.62,0.251%,5
202999,2019-05-24,Lime,0.15,PoweredZYNSVOII2QSLD,33.15,2.302%,5
203031,2019-05-24,Lime,0.15,PoweredZZ4H3UK3IFF4O,239.44,16.628%,5
203098,2019-05-24,Lime,0.15,PoweredZZNTWU4QI7MLI,20.89,1.451%,5


In [22]:
# maybe iterate here.....and sum up the minutes ALL scooters were used for each company

In [43]:
may_minutes_bolt = 0
june_minutes_bolt = 0
july_minutes_bolt = 0

may_minutes_jump = 0
june_minutes_jump = 0
july_minutes_jump = 0

may_minutes_gotcha = 0
june_minutes_gotcha = 0
july_minutes_gotcha = 0

may_minutes_lime = 0
june_minutes_lime = 0
july_minutes_lime = 0

may_minutes_lyft = 0
june_minutes_lyft = 0
july_minutes_lyft = 0

may_minutes_bird = 0
june_minutes_bird = 0
july_minutes_bird = 0

may_minutes_spin = 0
june_minutes_spin = 0
july_minutes_spin = 0

for index, row in tripscost.iterrows():
    if row.companyname == 'Bolt Mobility':
        if row.month == 5:
            may_minutes_bolt += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_bolt += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_bolt += row.total_min_per_day_used
    elif row.companyname == 'JUMP':
        if row.month == 5:
            may_minutes_jump += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_jump += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_jump += row.total_min_per_day_used
    elif row.companyname == 'Gotcha':
        if row.month == 5:
            may_minutes_gotcha += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_gotcha += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_gotcha += row.total_min_per_day_used
    elif row.companyname == 'Lime':
        if row.month == 5:
            may_minutes_lime += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_lime += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_lime += row.total_min_per_day_used
    elif row.companyname == 'Bird':
        if row.month == 5:
            may_minutes_bird += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_bird += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_bird += row.total_min_per_day_used
    elif row.companyname == 'SPIN':
        if row.month == 5:
            may_minutes_spin += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_spin += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_lime += row.total_min_per_day_used
    if row.companyname == 'Lyft':
        if row.month == 5:
            may_minutes_lyft += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_lyft += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_lyft += row.total_min_per_day_used
print('Bolt: ', may_minutes_bolt, june_minutes_bolt, july_minutes_bolt)
print('Jump: ', may_minutes_jump, june_minutes_jump, july_minutes_jump)
print('Gotcha: ', may_minutes_gotcha, june_minutes_gotcha, july_minutes_gotcha)
print('Lime: ', may_minutes_lime, june_minutes_lime, july_minutes_lime)
print('Bird: ', may_minutes_bird, june_minutes_bird, july_minutes_bird)
print('SPIN: ', may_minutes_spin, june_minutes_spin, july_minutes_spin)
print('Lyft: ', may_minutes_lyft, june_minutes_lyft, july_minutes_lyft)

Bolt:  99348.85999999991 228247.43999999962 171087.02999999985
Jump:  0 20845.13999999999 105280.20000000011
Gotcha:  6514.54 18845.329999999994 8018.889999999999
Lime:  654677.2599999993 1264550.4100000057 812861.4800000013
Bird:  274462.0999999995 725826.2300000008 585148.8400000024
SPIN:  117078.83000000044 324137.38999999786 0
Lyft:  248812.31999999975 593536.6100000014 456031.7300000012


In [None]:
def total_min_column(company, var1, var2, var2):
    for index, row in tripscost.iterrows():
        if row.companyname == company & row.
            
            
            

In [32]:
tripscost

Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent,month,total_min_per_month
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%,5,0
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%,5,0
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%,5,0
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%,5,0
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%,5,0
...,...,...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,5.45,0.378%,6,0
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%,6,0
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%,6,0
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%,6,0
