In [1]:
from sqlalchemy import create_engine, text
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from shapely.geometry import Point

In [2]:
database_name = 'nashville_scooters'    # Fill this in with your database name

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

engine = create_engine(connection_string)

In [3]:
query = '''

WITH cte1 AS (SELECT pubtimestamp::date,
    		companyname,
    		sumdid, 
			startdate+starttime AS trip_start,
			enddate+endtime AS trip_end
			FROM TRIPS),
	cte2 as 
	 	(SELECT pubdatetime::date AS pubtimestamp, 
		 		sumdid, 
		 		costpermin,
		 		companyname
			FROM (SELECT pubdatetime, 
				  		 sumdid, 
				  		 costpermin,
				  	CASE WHEN companyname = 'Bolt' THEN 'Bolt Mobility'
		 				 WHEN companyname = 'Jump' THEN 'JUMP' 
				  		 WHEN companyname = 'Spin' THEN 'SPIN' ELSE companyname END AS companyname 
				  --this subbquery is so we can join on company name bellow
		  			FROM scooters) AS scooters2 
			GROUP BY sumdid, companyname, pubtimestamp, costpermin)
SELECT 
    pubtimestamp::date AS date,
    companyname,
	costpermin,
    sumdid, 
	tripduration,
	SUM(ROUND(EXTRACT(EPOCH FROM (trip_end - trip_start)) / 60.0, 2)) AS total_min_per_day_used,
	ROUND((SUM(ROUND(EXTRACT(EPOCH FROM (trip_end - trip_start)) / 60.0, 2)) / 1440.0 * 100)::numeric, 3) || '%' AS trip_use_per_day_percent
FROM trips
FULL JOIN cte1 USING(pubtimestamp, companyname, sumdid)
FULL JOIN cte2 USING(pubtimestamp, sumdid, companyname)
GROUP BY sumdid, companyname, date, costpermin, tripduration
HAVING SUM(ROUND(EXTRACT(EPOCH FROM (trip_end - trip_start)) / 60.0, 2)) <= 1440
;
'''

with engine.connect() as connection:    
    trips_main = pd.read_sql(text(query), con = connection)

trips_main

Unnamed: 0,date,companyname,costpermin,sumdid,tripduration,total_min_per_day_used,trip_use_per_day_percent
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,30.83,2.141%
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,56.35,3.913%
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,107.87,7.491%
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,147.86,10.268%
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,12.68,0.881%
...,...,...,...,...,...,...,...
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,,9.76,0.678%
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,,39.65,2.753%
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,,104.92,7.286%
203169,2019-06-25,Lime,0.15,PoweredZZQOO54WD4AJT,,33.92,2.356%


In [4]:
trips_duration = trips_main.loc[trips_main['costpermin'] > 0]

In [5]:
trips_duration

Unnamed: 0,date,companyname,costpermin,sumdid,tripduration,total_min_per_day_used,trip_use_per_day_percent
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,30.83,2.141%
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,56.35,3.913%
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,107.87,7.491%
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,147.86,10.268%
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,12.68,0.881%
...,...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,,5.45,0.378%
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,,9.76,0.678%
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,,39.65,2.753%
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,,104.92,7.286%


In [6]:
trips_duration2 = trips_duration.loc[trips_duration['total_min_per_day_used'] >= 0]
trips_duration2

Unnamed: 0,date,companyname,costpermin,sumdid,tripduration,total_min_per_day_used,trip_use_per_day_percent
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,30.83,2.141%
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,56.35,3.913%
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,107.87,7.491%
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,147.86,10.268%
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,,12.68,0.881%
...,...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,,5.45,0.378%
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,,9.76,0.678%
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,,39.65,2.753%
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,,104.92,7.286%


In [7]:
tripscost = trips_duration2.drop(['tripduration'], axis = 1)

In [8]:
tripscost

Unnamed: 0,date,companyname,costpermin,sumdid,total_min_per_day_used,trip_use_per_day_percent
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%
...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,5.45,0.378%
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%


In [9]:
tripscost.info()

<class 'pandas.core.frame.DataFrame'>
Index: 156990 entries, 0 to 203169
Data columns (total 6 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   date                      156990 non-null  object 
 1   companyname               156990 non-null  object 
 2   costpermin                156990 non-null  float64
 3   sumdid                    156990 non-null  object 
 4   total_min_per_day_used    156990 non-null  float64
 5   trip_use_per_day_percent  156990 non-null  object 
dtypes: float64(2), object(4)
memory usage: 8.4+ MB


In [10]:
pd.to_datetime(tripscost['date'])

0        2019-05-24
1        2019-05-25
2        2019-05-26
3        2019-05-27
4        2019-05-29
            ...    
203165   2019-06-21
203166   2019-06-22
203167   2019-06-23
203168   2019-06-24
203169   2019-06-25
Name: date, Length: 156990, dtype: datetime64[ns]

In [11]:
tripscost['date'] = pd.to_datetime(tripscost['date'])

In [12]:
tripscost.dtypes

date                        datetime64[ns]
companyname                         object
costpermin                         float64
sumdid                              object
total_min_per_day_used             float64
trip_use_per_day_percent            object
dtype: object

In [13]:
tripscost['month'] = tripscost['date'].dt.month
tripscost.head()

Unnamed: 0,date,companyname,costpermin,sumdid,total_min_per_day_used,trip_use_per_day_percent,month
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%,5
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%,5
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%,5
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%,5
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%,5


In [14]:
tripscost_may = tripscost.loc[tripscost.month == 5]
tripscost_june = tripscost.loc[tripscost.month == 6]
tripscost_july = tripscost.loc[tripscost.month == 7]


In [15]:
(tripscost_may
 .groupby(pd.Grouper(key = 'date',
                     freq = '1d',
                     origin = 'epoch'
                    ))
 ['total_min_per_day_used']
 .sum()
 )



date
2019-05-22     75821.77
2019-05-23     86273.07
2019-05-24    135270.03
2019-05-25    250033.07
2019-05-26    265309.99
2019-05-27    176369.11
2019-05-28     94071.85
2019-05-29     64612.64
2019-05-30     78041.11
2019-05-31    175091.27
Freq: D, Name: total_min_per_day_used, dtype: float64

I think what I want to do is see the total or avg of total min per day used for each company

In [16]:
tripscost_may_test = tripscost_may.drop(['sumdid', 'trip_use_per_day_percent', 'month'], axis = 1)
tripscost_may_test

Unnamed: 0,date,companyname,costpermin,total_min_per_day_used
0,2019-05-24,Bolt Mobility,0.15,30.83
1,2019-05-25,Bolt Mobility,0.15,56.35
2,2019-05-26,Bolt Mobility,0.15,107.87
3,2019-05-27,Bolt Mobility,0.15,147.86
4,2019-05-29,Bolt Mobility,0.15,12.68
...,...,...,...,...
203100,2019-05-27,Lime,0.15,134.68
203101,2019-05-29,Lime,0.15,21.83
203147,2019-05-24,Lime,0.15,298.30
203148,2019-05-25,Lime,0.15,62.79


In [17]:
tripscost_may_test = (tripscost_may_test
 .groupby(['companyname', 'date', 'costpermin'])
 ['total_min_per_day_used']
 .mean()
 .reset_index())

In [18]:
tripscost_may_test

Unnamed: 0,companyname,date,costpermin,total_min_per_day_used
0,Bird,2019-05-22,0.15,18.546317
1,Bird,2019-05-23,0.15,19.404013
2,Bird,2019-05-24,0.15,23.099946
3,Bird,2019-05-25,0.15,36.943902
4,Bird,2019-05-26,0.15,41.083891
5,Bird,2019-05-27,0.15,34.978963
6,Bird,2019-05-28,0.15,23.779754
7,Bird,2019-05-29,0.15,16.968056
8,Bird,2019-05-30,0.15,19.444918
9,Bird,2019-05-31,0.15,27.061816


In [19]:
#tripscost_may_test['avg_cost_per_day'] = tripscost_may_test[['costpermin'] * ['total_min_per_day_used']]
tripscost_may_test.dtypes

companyname                       object
date                      datetime64[ns]
costpermin                       float64
total_min_per_day_used           float64
dtype: object

In [20]:
tripscost_may_test['avg_cost_per_day'] = ' '

for index, row in tripscost_may_test.iterrows():
    tripscost_may_test.loc[index, 'avg_cost_per_day'] = (tripscost_may_test.loc[index, 'costpermin'] * tripscost_may_test.loc[index, 'total_min_per_day_used'])

tripscost_may_test

Unnamed: 0,companyname,date,costpermin,total_min_per_day_used,avg_cost_per_day
0,Bird,2019-05-22,0.15,18.546317,2.781947
1,Bird,2019-05-23,0.15,19.404013,2.910602
2,Bird,2019-05-24,0.15,23.099946,3.464992
3,Bird,2019-05-25,0.15,36.943902,5.541585
4,Bird,2019-05-26,0.15,41.083891,6.162584
5,Bird,2019-05-27,0.15,34.978963,5.246844
6,Bird,2019-05-28,0.15,23.779754,3.566963
7,Bird,2019-05-29,0.15,16.968056,2.545208
8,Bird,2019-05-30,0.15,19.444918,2.916738
9,Bird,2019-05-31,0.15,27.061816,4.059272


In [21]:
tripscost_may_test = tripscost_may_test.rename(columns = {'total_min_per_day_used' : 'avg_min_per_day_used'})

In [22]:
tripscost_may_test

Unnamed: 0,companyname,date,costpermin,avg_min_per_day_used,avg_cost_per_day
0,Bird,2019-05-22,0.15,18.546317,2.781947
1,Bird,2019-05-23,0.15,19.404013,2.910602
2,Bird,2019-05-24,0.15,23.099946,3.464992
3,Bird,2019-05-25,0.15,36.943902,5.541585
4,Bird,2019-05-26,0.15,41.083891,6.162584
5,Bird,2019-05-27,0.15,34.978963,5.246844
6,Bird,2019-05-28,0.15,23.779754,3.566963
7,Bird,2019-05-29,0.15,16.968056,2.545208
8,Bird,2019-05-30,0.15,19.444918,2.916738
9,Bird,2019-05-31,0.15,27.061816,4.059272


OKAY PIVOT MY GOAL NOW is to see EACH company and the AVERAGE MIN PER DAY FOR AN ENTIRE MONTH and the AVG COST PER DAY for entire month
so like

COMPANY NAME | MONTH | AVG_MIN | AVG_COST

In [23]:
tripscost

Unnamed: 0,date,companyname,costpermin,sumdid,total_min_per_day_used,trip_use_per_day_percent,month
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%,5
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%,5
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%,5
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%,5
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%,5
...,...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,5.45,0.378%,6
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%,6
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%,6
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%,6


In [24]:
tripscost = tripscost.drop(['sumdid', 'trip_use_per_day_percent'], axis=1)

In [25]:
tripscost = tripscost.drop(['date'], axis = 1)

In [26]:
tripscost

Unnamed: 0,companyname,costpermin,total_min_per_day_used,month
0,Bolt Mobility,0.15,30.83,5
1,Bolt Mobility,0.15,56.35,5
2,Bolt Mobility,0.15,107.87,5
3,Bolt Mobility,0.15,147.86,5
4,Bolt Mobility,0.15,12.68,5
...,...,...,...,...
203165,Lime,0.15,5.45,6
203166,Lime,0.15,9.76,6
203167,Lime,0.15,39.65,6
203168,Lime,0.15,104.92,6


In [27]:
tripscost_grouped_month = (tripscost
     .groupby(['companyname', 'month', 'costpermin'])
     ['total_min_per_day_used']
 .mean()
 .reset_index())

In [28]:
tripscost_grouped_month

Unnamed: 0,companyname,month,costpermin,total_min_per_day_used
0,Bird,5,0.15,28.409285
1,Bird,6,0.15,26.299958
2,Bird,7,0.15,26.269308
3,Bolt Mobility,5,0.15,57.929364
4,Bolt Mobility,6,0.15,49.085471
5,Bolt Mobility,7,0.15,56.703584
6,Bolt Mobility,7,0.3,46.922201
7,Gotcha,5,0.15,28.572544
8,Gotcha,6,0.15,25.398019
9,Gotcha,7,0.15,22.336741


In [29]:
tripscost_grouped_month['avg_cost_per_day'] = ' '

for index, row in tripscost_grouped_month.iterrows():
    tripscost_grouped_month.loc[index, 'avg_cost_per_day'] = (tripscost_grouped_month.loc[index, 'costpermin'] * tripscost_grouped_month.loc[index, 'total_min_per_day_used'])

tripscost_grouped_month


Unnamed: 0,companyname,month,costpermin,total_min_per_day_used,avg_cost_per_day
0,Bird,5,0.15,28.409285,4.261393
1,Bird,6,0.15,26.299958,3.944994
2,Bird,7,0.15,26.269308,3.940396
3,Bolt Mobility,5,0.15,57.929364,8.689405
4,Bolt Mobility,6,0.15,49.085471,7.362821
5,Bolt Mobility,7,0.15,56.703584,8.505538
6,Bolt Mobility,7,0.3,46.922201,14.07666
7,Gotcha,5,0.15,28.572544,4.285882
8,Gotcha,6,0.15,25.398019,3.809703
9,Gotcha,7,0.15,22.336741,3.350511


In [30]:
tripscost_grouped_month = tripscost_grouped_month.rename(columns = {'total_min_per_day_used':'avg_min_per_day_used'})

In [31]:
tripscost_grouped_month

Unnamed: 0,companyname,month,costpermin,avg_min_per_day_used,avg_cost_per_day
0,Bird,5,0.15,28.409285,4.261393
1,Bird,6,0.15,26.299958,3.944994
2,Bird,7,0.15,26.269308,3.940396
3,Bolt Mobility,5,0.15,57.929364,8.689405
4,Bolt Mobility,6,0.15,49.085471,7.362821
5,Bolt Mobility,7,0.15,56.703584,8.505538
6,Bolt Mobility,7,0.3,46.922201,14.07666
7,Gotcha,5,0.15,28.572544,4.285882
8,Gotcha,6,0.15,25.398019,3.809703
9,Gotcha,7,0.15,22.336741,3.350511


In [None]:
# what i'm thinking about above -- perhaps i can display this in tableau as a 