In [1]:
from sqlalchemy import create_engine, text
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from shapely.geometry import Point

In [2]:
database_name = 'nashville_scooters'    # Fill this in with your database name

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

engine = create_engine(connection_string)

In [3]:
query = '''

WITH cte1 AS (SELECT pubtimestamp::date,
    		companyname,
    		sumdid, 
			startdate+starttime AS trip_start,
			enddate+endtime AS trip_end
			FROM TRIPS),
	cte2 as 
	 	(SELECT pubdatetime::date AS pubtimestamp, 
		 		sumdid, 
		 		costpermin,
		 		companyname
			FROM (SELECT pubdatetime, 
				  		 sumdid, 
				  		 costpermin,
				  	CASE WHEN companyname = 'Bolt' THEN 'Bolt Mobility'
		 				 WHEN companyname = 'Jump' THEN 'JUMP' 
				  		 WHEN companyname = 'Spin' THEN 'SPIN' ELSE companyname END AS companyname 
				  --this subbquery is so we can join on company name bellow
		  			FROM scooters) AS scooters2 
			GROUP BY sumdid, companyname, pubtimestamp, costpermin)
SELECT 
    pubtimestamp::date AS date,
    companyname,
	min(costpermin) as min_cost,
    sumdid, 
	SUM(ROUND(EXTRACT(EPOCH FROM (trip_end - trip_start)) / 60.0, 2)) AS total_min_per_day_used,
	ROUND((SUM(ROUND(EXTRACT(EPOCH FROM (trip_end - trip_start)) / 60.0, 2)) / 1440.0 * 100)::numeric, 3) || '%' AS trip_use_per_day_percent
FROM trips
FULL JOIN cte1 USING(pubtimestamp, companyname, sumdid)
FULL JOIN cte2 USING(pubtimestamp, sumdid, companyname)
GROUP BY sumdid, companyname, date, costpermin
HAVING SUM(ROUND(EXTRACT(EPOCH FROM (trip_end - trip_start)) / 60.0, 2)) < 1440 
;
'''

with engine.connect() as connection:    
    trips_main = pd.read_sql(text(query), con = connection)

trips_main

Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%
...,...,...,...,...,...,...
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%
203169,2019-06-25,Lime,0.15,PoweredZZQOO54WD4AJT,33.92,2.356%


In [4]:
trips_duration = trips_main.loc[trips_main['min_cost'] > 0]

In [5]:
tripscost = trips_duration

In [6]:
tripscost

Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%
...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,5.45,0.378%
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%


In [7]:
## i'm interested in the MONTHLY average cost? i think. so here's what i'd want to look into
# we have seven companies and three months. we don't see a lot of use in scooters until the last week of may.
#for each month, add up the total_min_per_day used and divide b y the days represnted in our data...or just grab the mean?

In [8]:
tripscost['date'] = pd.to_datetime(tripscost['date'])
tripscost.info()

<class 'pandas.core.frame.DataFrame'>
Index: 156994 entries, 0 to 203169
Data columns (total 6 columns):
 #   Column                    Non-Null Count   Dtype         
---  ------                    --------------   -----         
 0   date                      156994 non-null  datetime64[ns]
 1   companyname               156994 non-null  object        
 2   min_cost                  156994 non-null  float64       
 3   sumdid                    156994 non-null  object        
 4   total_min_per_day_used    156994 non-null  float64       
 5   trip_use_per_day_percent  156994 non-null  object        
dtypes: datetime64[ns](1), float64(2), object(3)
memory usage: 8.4+ MB


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tripscost['date'] = pd.to_datetime(tripscost['date'])


In [9]:
tripscost['month'] = tripscost['date'].dt.month
tripscost

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tripscost['month'] = tripscost['date'].dt.month


Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent,month
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%,5
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%,5
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%,5
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%,5
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%,5
...,...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,5.45,0.378%,6
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%,6
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%,6
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%,6


In [10]:
tripscost.loc[tripscost['date'] == '2019-05-24']

Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent,month
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%,5
44,2019-05-24,Bolt Mobility,0.15,Powered-01a24436-0315-e1bb-7ce0-d081d05dff7d,15.20,1.056%,5
141,2019-05-24,Bolt Mobility,0.15,Powered-0479bb84-afbd-0426-f1c4-df628542a88c,76.59,5.319%,5
181,2019-05-24,Bolt Mobility,0.15,Powered-0540c92c-0907-dafa-b19f-011495068ec5,82.26,5.713%,5
218,2019-05-24,Bolt Mobility,0.15,Powered-0555d61d-1c59-4219-edf3-0f00e286629a,9.12,0.633%,5
...,...,...,...,...,...,...,...
202971,2019-05-24,Bird,0.15,PoweredZYM3M,3.62,0.251%,5
202999,2019-05-24,Lime,0.15,PoweredZYNSVOII2QSLD,33.15,2.302%,5
203031,2019-05-24,Lime,0.15,PoweredZZ4H3UK3IFF4O,239.44,16.628%,5
203098,2019-05-24,Lime,0.15,PoweredZZNTWU4QI7MLI,20.89,1.451%,5


In [11]:
# maybe iterate here.....and sum up the minutes ALL scooters were used for each company

In [12]:
may_minutes_bolt = 0
june_minutes_bolt = 0
july_minutes_bolt = 0

may_minutes_jump = 0
june_minutes_jump = 0
july_minutes_jump = 0

may_minutes_gotcha = 0
june_minutes_gotcha = 0
july_minutes_gotcha = 0

may_minutes_lime = 0
june_minutes_lime = 0
july_minutes_lime = 0

may_minutes_lyft = 0
june_minutes_lyft = 0
july_minutes_lyft = 0

may_minutes_bird = 0
june_minutes_bird = 0
july_minutes_bird = 0

may_minutes_spin = 0
june_minutes_spin = 0
july_minutes_spin = 0

for index, row in tripscost.iterrows():
    if row.companyname == 'Bolt Mobility':
        if row.month == 5:
            may_minutes_bolt += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_bolt += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_bolt += row.total_min_per_day_used
    elif row.companyname == 'JUMP':
        if row.month == 5:
            may_minutes_jump += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_jump += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_jump += row.total_min_per_day_used
    elif row.companyname == 'Gotcha':
        if row.month == 5:
            may_minutes_gotcha += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_gotcha += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_gotcha += row.total_min_per_day_used
    elif row.companyname == 'Lime':
        if row.month == 5:
            may_minutes_lime += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_lime += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_lime += row.total_min_per_day_used
    elif row.companyname == 'Bird':
        if row.month == 5:
            may_minutes_bird += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_bird += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_bird += row.total_min_per_day_used
    elif row.companyname == 'SPIN':
        if row.month == 5:
            may_minutes_spin += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_spin += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_lime += row.total_min_per_day_used
    if row.companyname == 'Lyft':
        if row.month == 5:
            may_minutes_lyft += row.total_min_per_day_used
        elif row.month == 6:
            june_minutes_lyft += row.total_min_per_day_used
        elif row.month == 7:
            july_minutes_lyft += row.total_min_per_day_used
print('Bolt: ', may_minutes_bolt, june_minutes_bolt, july_minutes_bolt)
print('Jump: ', may_minutes_jump, june_minutes_jump, july_minutes_jump)
print('Gotcha: ', may_minutes_gotcha, june_minutes_gotcha, july_minutes_gotcha)
print('Lime: ', may_minutes_lime, june_minutes_lime, july_minutes_lime)
print('Bird: ', may_minutes_bird, june_minutes_bird, july_minutes_bird)
print('SPIN: ', may_minutes_spin, june_minutes_spin, july_minutes_spin)
print('Lyft: ', may_minutes_lyft, june_minutes_lyft, july_minutes_lyft)

Bolt:  99348.85999999991 228247.43999999962 171087.02999999985
Jump:  0 20845.13999999999 105280.20000000011
Gotcha:  6514.54 18845.329999999994 8018.889999999999
Lime:  654677.2599999993 1264550.4100000057 812861.4800000013
Bird:  274462.0999999995 725826.2300000008 585148.8400000024
SPIN:  117078.83000000044 324137.38999999786 0
Lyft:  248812.31999999975 593536.6100000014 456031.7300000012


what am i trying to do? i have the total minutes by company and month stored in variables. i could set these manually or write a function. i think writing a function would be cool. what do i want it to do?

i have seven companies and minutes for those companies segmented by month. if the company is bolt and the month is seven, then i want to set the column values for that row and index to the corresponding variable. right? 

so first i think i should do that group by i did in my other notebook so it has COMPANY NAME | MONTH | COSTPERMIN| TOTALMIN. totalmin i'd have to have blank and then add in via my function maybe? let's start with the group by and get it ready. 

In [13]:
tripscost.head()

Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent,month
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%,5
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%,5
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%,5
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%,5
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%,5


In [14]:
tripscost_scootcount = tripscost

In [15]:
tripscost = tripscost.drop(['date', 'sumdid', 'total_min_per_day_used', 'trip_use_per_day_percent'], axis=1)

In [16]:
tripscost

Unnamed: 0,companyname,min_cost,month
0,Bolt Mobility,0.15,5
1,Bolt Mobility,0.15,5
2,Bolt Mobility,0.15,5
3,Bolt Mobility,0.15,5
4,Bolt Mobility,0.15,5
...,...,...,...
203165,Lime,0.15,6
203166,Lime,0.15,6
203167,Lime,0.15,6
203168,Lime,0.15,6


In [17]:
def total_min_column(company, may, june, july):
    for index, row in tripscost.iterrows():
        if row.companyname == company and row.month == 5:
            tripscost.loc[index, 'total_min_per_month'] = may
        elif row.companyname == company and row.month == 6:
            tripscost.loc[index, 'total_min_per_month'] = june
        elif row.companyname == company and row.month == 7:
            tripscost.loc[index, 'total_min_per_month'] = july
            


            
            
            
            

In [18]:
total_min_column('Bolt Mobility', may_minutes_bolt, june_minutes_bolt, july_minutes_bolt)
total_min_column('JUMP', may_minutes_jump, june_minutes_jump, july_minutes_jump)
total_min_column('Gotcha', may_minutes_gotcha, june_minutes_gotcha, july_minutes_gotcha)
total_min_column('Lime', may_minutes_lime, june_minutes_lime, july_minutes_lime)
total_min_column('Bird', may_minutes_bird, june_minutes_bird, july_minutes_bird)
total_min_column('SPIN', may_minutes_spin, june_minutes_spin, july_minutes_spin)
total_min_column('Lyft', may_minutes_lyft, june_minutes_lyft, july_minutes_lyft)
tripscost

Unnamed: 0,companyname,min_cost,month,total_min_per_month
0,Bolt Mobility,0.15,5,99348.86
1,Bolt Mobility,0.15,5,99348.86
2,Bolt Mobility,0.15,5,99348.86
3,Bolt Mobility,0.15,5,99348.86
4,Bolt Mobility,0.15,5,99348.86
...,...,...,...,...
203165,Lime,0.15,6,1264550.41
203166,Lime,0.15,6,1264550.41
203167,Lime,0.15,6,1264550.41
203168,Lime,0.15,6,1264550.41


In [19]:
tripscost_grouped_month = (tripscost
     .groupby(['companyname', 'month', 'total_min_per_month'])
     ['min_cost']
 .min()
 .reset_index())

In [20]:
tripscost_grouped_month

Unnamed: 0,companyname,month,total_min_per_month,min_cost
0,Bird,5,274462.1,0.15
1,Bird,6,725826.23,0.15
2,Bird,7,585148.84,0.15
3,Bolt Mobility,5,99348.86,0.15
4,Bolt Mobility,6,228247.44,0.15
5,Bolt Mobility,7,171087.03,0.15
6,Gotcha,5,6514.54,0.15
7,Gotcha,6,18845.33,0.15
8,Gotcha,7,8018.89,0.15
9,JUMP,6,20845.14,0.06


In [21]:
tripscost_grouped_month['cost_per_month']= 0
for index, row in tripscost_grouped_month.iterrows():
    tripscost_grouped_month.loc[index, 'cost_per_month'] = (row.total_min_per_month * row.min_cost)

tripscost_grouped_month
    

  tripscost_grouped_month.loc[index, 'cost_per_month'] = (row.total_min_per_month * row.min_cost)


Unnamed: 0,companyname,month,total_min_per_month,min_cost,cost_per_month
0,Bird,5,274462.1,0.15,41169.315
1,Bird,6,725826.23,0.15,108873.9345
2,Bird,7,585148.84,0.15,87772.326
3,Bolt Mobility,5,99348.86,0.15,14902.329
4,Bolt Mobility,6,228247.44,0.15,34237.116
5,Bolt Mobility,7,171087.03,0.15,25663.0545
6,Gotcha,5,6514.54,0.15,977.181
7,Gotcha,6,18845.33,0.15,2826.7995
8,Gotcha,7,8018.89,0.15,1202.8335
9,JUMP,6,20845.14,0.06,1250.7084


In [22]:
#profit:scooter maybe look at what amount of scooters are in circulation for each company, compare that to the cost per month

In [23]:
tripscost_scootcount

Unnamed: 0,date,companyname,min_cost,sumdid,total_min_per_day_used,trip_use_per_day_percent,month
0,2019-05-24,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,30.83,2.141%,5
1,2019-05-25,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,56.35,3.913%,5
2,2019-05-26,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,107.87,7.491%,5
3,2019-05-27,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,147.86,10.268%,5
4,2019-05-29,Bolt Mobility,0.15,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,12.68,0.881%,5
...,...,...,...,...,...,...,...
203165,2019-06-21,Lime,0.15,PoweredZZQOO54WD4AJT,5.45,0.378%,6
203166,2019-06-22,Lime,0.15,PoweredZZQOO54WD4AJT,9.76,0.678%,6
203167,2019-06-23,Lime,0.15,PoweredZZQOO54WD4AJT,39.65,2.753%,6
203168,2019-06-24,Lime,0.15,PoweredZZQOO54WD4AJT,104.92,7.286%,6


In [24]:
# maybe I want to do a value counts here? for each month and company, what's the value counts sumdid? let's just try on the dataframe above first.

In [25]:
tripscost_scootcount = tripscost_scootcount.drop(['date', 'min_cost', 'total_min_per_day_used', 'trip_use_per_day_percent'], axis = 1)

In [26]:
tripscost_scootcount

Unnamed: 0,companyname,sumdid,month
0,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
1,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
2,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
3,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
4,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
...,...,...,...
203165,Lime,PoweredZZQOO54WD4AJT,6
203166,Lime,PoweredZZQOO54WD4AJT,6
203167,Lime,PoweredZZQOO54WD4AJT,6
203168,Lime,PoweredZZQOO54WD4AJT,6


#you could loop and like, set a variable to empty. each iteration this is what happens : is the sumdid equal to the variable? if yes, do nothing. if no, add to the count and reset the variable.
ideas from josh: dictionary with key as month, dictionary IN dictionary for companyname?

In [27]:
sumdid_var = ' '
bolt_may_count = 0
bolt_june_count = 0
bolt_july_count = 0
for index, row in tripscost_scootcount.iterrows():
    if row.companyname == 'Bolt Mobility' and row.month == 5:
        if row.sumdid != sumdid_var:
            bolt_may_count +=1
            sumdid_var = row.sumdid
    elif row.companyname == 'Bolt Mobility' and row.month == 6:
        if row.sumdid != sumdid_var:
            bolt_june_count +=1
            sumdid_var = row.sumdid
print(bolt_may_count)
print(bolt_june_count)
            

342
9


i don't think this is working...maybe a val count again?

In [28]:
tripscost_scootcount.month.value_counts()

month
6    74061
7    54753
5    28180
Name: count, dtype: int64

In [29]:
#maybe break up by company

In [30]:
scootcount_bolt = tripscost_scootcount.loc[tripscost_scootcount.companyname == 'Bolt Mobility']
scootcount_bolt

Unnamed: 0,companyname,sumdid,month
0,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
1,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
2,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
3,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
4,Bolt Mobility,Powered-017d3133-f14a-2b83-ee4f-d777e7c5b619,5
...,...,...,...
9568,Bolt Mobility,Powered-fe3377d6-5d33-4902-dbc5-d7165616b48d,6
9569,Bolt Mobility,Powered-fe3377d6-5d33-4902-dbc5-d7165616b48d,6
9570,Bolt Mobility,Powered-fe3377d6-5d33-4902-dbc5-d7165616b48d,6
9571,Bolt Mobility,Powered-fe3377d6-5d33-4902-dbc5-d7165616b48d,6


In [31]:
scootcount_jump = tripscost_scootcount.loc[tripscost_scootcount.companyname == 'JUMP']

In [32]:
scootcount_lime = tripscost_scootcount.loc[tripscost_scootcount.companyname == 'Lime']

In [33]:
scootcount_bird = tripscost_scootcount.loc[tripscost_scootcount.companyname == 'Bird']

In [34]:
scootcount_lyft = tripscost_scootcount.loc[tripscost_scootcount.companyname == 'Lyft']

In [35]:
scootcount_gotcha = tripscost_scootcount.loc[tripscost_scootcount.companyname == 'Gotcha']

In [36]:
scootcount_spin = tripscost_scootcount.loc[tripscost_scootcount.companyname == 'SPIN']

In [37]:
test = scootcount_bolt.month.value_counts()

In [38]:
type(test)

pandas.core.series.Series

In [39]:
scootcount_bolt = (scootcount_bolt.groupby(['month', 'companyname'])
['sumdid']
 .nunique()
)

In [40]:
scootcount_bolt = scootcount_bolt.reset_index()

In [41]:
scootcount_jump = (scootcount_jump.groupby(['month', 'companyname'])
['sumdid']
 .nunique()
 .reset_index()
)

scootcount_jump

Unnamed: 0,month,companyname,sumdid
0,6,JUMP,172
1,7,JUMP,409


In [42]:
scootcount_bird = (scootcount_bird.groupby(['month', 'companyname'])
['sumdid']
 .nunique()
 .reset_index()
)

scootcount_bird

Unnamed: 0,month,companyname,sumdid
0,5,Bird,2286
1,6,Bird,2852
2,7,Bird,2461


In [43]:
scootcount_lyft = (scootcount_lyft.groupby(['month', 'companyname'])
['sumdid']
 .nunique()
 .reset_index()
)

scootcount_spin = (scootcount_spin.groupby(['month', 'companyname'])
['sumdid']
 .nunique()
 .reset_index()
)

scootcount_lime = (scootcount_lime.groupby(['month', 'companyname'])
['sumdid']
 .nunique()
 .reset_index()
)

scootcount_gotcha = (scootcount_gotcha.groupby(['month', 'companyname'])
['sumdid']
 .nunique()
 .reset_index()
)


scootcount_gotcha

Unnamed: 0,month,companyname,sumdid
0,5,Gotcha,102
1,6,Gotcha,133
2,7,Gotcha,103


In [44]:

scootcount_spin


Unnamed: 0,month,companyname,sumdid
0,5,SPIN,602
1,6,SPIN,670
2,7,SPIN,512


In [45]:
pd.concat([scootcount_bolt, scootcount_jump, scootcount_spin, scootcount_lime, scootcount_gotcha, scootcount_lyft, scootcount_bird])

Unnamed: 0,month,companyname,sumdid
0,5,Bolt Mobility,342
1,6,Bolt Mobility,329
2,7,Bolt Mobility,267
0,6,JUMP,172
1,7,JUMP,409
0,5,SPIN,602
1,6,SPIN,670
2,7,SPIN,512
0,5,Lime,1336
1,6,Lime,1407


In [47]:
scootcount_all = pd.concat([scootcount_bolt, scootcount_jump, scootcount_spin, scootcount_lime, scootcount_gotcha, scootcount_lyft, scootcount_bird])

In [48]:
scootcount_all

Unnamed: 0,month,companyname,sumdid
0,5,Bolt Mobility,342
1,6,Bolt Mobility,329
2,7,Bolt Mobility,267
0,6,JUMP,172
1,7,JUMP,409
0,5,SPIN,602
1,6,SPIN,670
2,7,SPIN,512
0,5,Lime,1336
1,6,Lime,1407


In [46]:
# def count_up_scooters(dataframe):
#         dataframe = (dataframe.groupby(['month', 'companyname'])
#                             ['sumdid']
#                              .nunique())
#         dataframe = dataframe.reset_index()
#         return dataframe

# count_up_scooters(scootcount_jump)
# scootcount_jump