In [1]:
from sqlalchemy import create_engine, text
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [2]:
database_name = 'scooters'

connection_string = f'postgresql://postgres:postgres@localhost:5432/{database_name}'

engine = create_engine(connection_string)

In [3]:
query = '''
SELECT companyname, SUM(tripduration)
FROM trips
WHERE tripduration IS NOT NULL
GROUP BY companyname
'''

pd.read_sql(query, con = engine)

Unnamed: 0,companyname,sum
0,Bird,2046202.0
1,Bolt Mobility,30821500.0
2,Gotcha,33802.78
3,JUMP,211001.3
4,Lime,3507335.0
5,Lyft,1936370.0
6,SPIN,900575.0


In [4]:
import pandas as pd

In [5]:
with engine.connect() as connection:
    people = pd.read_sql(text(query), con = connection)

people.head()

Unnamed: 0,companyname,sum
0,Bird,2046202.0
1,Bolt Mobility,30821500.0
2,Gotcha,33802.78
3,JUMP,211001.3
4,Lime,3507335.0


In [6]:
query = '''
select count(*)
FROM scooters '''

pd.read_sql(query, con = engine)

Unnamed: 0,count
0,73414043


In [7]:
query = '''
select *
FROM scooters
LIMIT 1 '''

pd.read_sql(query, con = engine)

Unnamed: 0,pubdatetime,latitude,longitude,sumdid,sumdtype,chargelevel,sumdgroup,costpermin,companyname
0,2019-06-22 19:32:19.400,36.16031,-86.77661,Powered8611980,Powered,61.0,Scooter,0.23,Spin


In [8]:
query = '''
select DISTINCT sumdgroup
FROM scooters '''

pd.read_sql(query, con = engine)

Unnamed: 0,sumdgroup
0,bicycle
1,scooter
2,Scooter


#answer values in sumdgroup are bicycle, scooter, Scooter. We only need scooter and Scooter

In [9]:
query = '''
SELECT 
    MIN(latitude) AS min_lat,
    MAX(latitude) AS max_lat,
    MIN(longitude) AS min_lon,
    MAX(longitude) AS max_lon
FROM scooters;
'''

with engine.connect() as connection:
    scoots = pd.read_sql(text(query), con = connection)

scoots

Unnamed: 0,min_lat,max_lat,min_lon,max_lon
0,0.0,3609874.0,-97.443879,0.0


In [None]:
#minimum longitude is the only one right

In [10]:
query = '''
SELECT *
FROM trips
'''

trips = pd.read_sql(query, con = engine)

In [11]:
trips.tripduration.min()

-19.3582666667

In [12]:
query = '''
SELECT 
    MIN(tripduration) AS min_du,
    MAX(tripduration) AS max_du,
    MIN(tripdistance) AS min_di,
    MAX(tripdistance) AS max_di
FROM trips;
'''

with engine.connect() as connection:
    scoots = pd.read_sql(text(query), con = connection)

scoots

Unnamed: 0,min_du,max_du,min_di,max_di
0,-19.358267,512619.0,-20324803.8,31884480.0


In [None]:
#all of these look wrong! How can it be negative time/distance? And the max for each are way too much@

In [13]:
query = '''
select DISTINCT companyname
FROM scooters '''

pd.read_sql(query, con = engine)

Unnamed: 0,companyname
0,Bird
1,Bolt
2,Gotcha
3,Jump
4,Lime
5,Lyft
6,Spin


In [14]:
trips.companyname.unique()

array(['Bird', 'Lyft', 'Lime', 'Bolt Mobility', 'SPIN', 'Gotcha', 'JUMP'],
      dtype=object)

In [None]:
#font is different for two companies. Bolt is two words in python code

query = '''
select COUNT(DISTINCT sumdid), companyname
FROM scooters
GROUP BY companyname '''

pd.read_sql(query, con = engine)

query = '''
select COUNT(DISTINCT sumdid), companyname
FROM scooters
GROUP BY companyname '''

pd.read_sql(query, con = engine)

In [15]:
query = '''
SELECT 
    companyname,
    EXTRACT('MONTH' FROM pubdatetime) AS month,
    COUNT(DISTINCT sumdid)
FROM scooters
WHERE sumdgroup ilike 'scooter'
GROUP BY companyname, EXTRACT('MONTH' FROM pubdatetime);
'''

with engine.connect() as connection:
    scoots = pd.read_sql(text(query), con = connection)

In [16]:
scoots

Unnamed: 0,companyname,month,count
0,Bird,5.0,3064
1,Bird,6.0,2910
2,Bird,7.0,2583
3,Bolt,5.0,346
4,Bolt,6.0,333
5,Bolt,7.0,276
6,Gotcha,5.0,223
7,Gotcha,6.0,223
8,Gotcha,7.0,224
9,Jump,5.0,976


In [None]:
#how many scooters for each company and how they have changed over time.

In [17]:
number1= trips[['companyname', 'tripduration']].groupby('companyname').mean('tripduration')
number1

Unnamed: 0_level_0,tripduration
companyname,Unnamed: 1_level_1
Bird,13.396196
Bolt Mobility,1408.017222
Gotcha,10.196918
JUMP,32.779447
Lime,15.540224
Lyft,16.004244
SPIN,26.141509


In [None]:
#varied scooter usage

In [18]:
trips.head()

Unnamed: 0,pubtimestamp,companyname,triprecordnum,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,create_dt
0,2019-05-01 00:00:55.423,Bird,BRD2134,Powered9EAJL,3.0,958.00528,2019-05-01,00:00:20.460000,2019-05-01,00:02:52.346666,36.1571,-86.8036,36.1566,-86.8067,"[(36.157235, -86.803612), (36.157235, -86.8036...",2019-05-02 05:30:23.780
1,2019-05-01 00:03:33.147,Lyft,LFT5,Powered296631,1.7156,1371.39112,2019-05-01,00:01:50.090000,2019-05-01,00:03:33.026666,36.15797,-86.77896,36.16054,-86.77689,"[(36.15797, -86.77896), (36.15795, -86.77873),...",2019-05-02 07:20:32.757
2,2019-05-01 00:05:55.570,Bird,BRD2168,Powered7S2UU,3.0,2296.588,2019-05-01,00:03:47.363333,2019-05-01,00:07:13.596666,36.1547,-86.7818,36.1565,-86.7868,"[(36.155068, -86.782124), (36.156597, -86.78675)]",2019-05-02 05:30:24.530
3,2019-05-01 00:05:55.570,Bird,BRD2166,PoweredZIIVX,3.0,1200.78744,2019-05-01,00:04:21.386666,2019-05-01,00:06:59.176666,36.1494,-86.7795,36.1531,-86.7796,"[(36.149741, -86.779344), (36.149741, -86.7793...",2019-05-02 05:30:24.237
4,2019-05-01 00:05:55.570,Bird,BRD2165,PoweredJ7MB3,2.0,351.04988,2019-05-01,00:04:27.796666,2019-05-01,00:06:23.150000,36.1778,-86.7866,36.1774,-86.7876,"[(36.177699, -86.786477), (36.177711, -86.7864...",2019-05-02 05:30:24.207


In [None]:
#No, there are less than one minute rides and more than 24 hours.

In [19]:
trips.sort_values('tripduration', ascending=False)

Unnamed: 0,pubtimestamp,companyname,triprecordnum,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,create_dt
502575,2019-07-16 21:26:59.000,Bolt Mobility,BOL00198,Powered-2763482f-d784-4797-7cf9-59346940451d,512619.000000,95505.25000,2019-07-10,23:01:13,2019-07-16,21:24:52,36.154280,-86.787008,36.149143,-86.788575,"[('36.154280', '-86.787008'), ('36.154212', '-...",2019-07-17 06:23:59.217
496213,2019-07-14 22:05:24.000,Bolt Mobility,BOL00284,Powered-878271f6-e755-d7e6-72e2-761c4d08c487,257790.000000,54662.08000,2019-07-11,22:25:02,2019-07-14,22:01:32,36.152072,-86.803823,36.161730,-86.774043,[],2019-07-15 06:23:35.623
473525,2019-07-10 01:05:36.000,Bolt Mobility,BOL00008,Powered-27c0ee73-6b11-7013-b7b9-0664173dc436,93837.000000,83402.23000,2019-07-08,22:59:45,2019-07-10,01:03:42,36.145472,-86.811302,36.146329,-86.813257,[],2019-07-11 06:21:38.050
482992,2019-07-13 04:13:31.000,Bolt Mobility,BOL00146,Powered-f63b8989-6b47-bda7-522a-5b4feea22493,92977.000000,22398.29000,2019-07-12,02:21:03,2019-07-13,04:10:40,36.164367,-86.769286,36.176582,-86.808390,[],2019-07-14 06:23:28.003
479864,2019-07-12 16:28:06.000,Bolt Mobility,BOL00151,Powered-90545f58-0043-4bfc-8a79-cf15e49f6f72,78802.000000,59632.55000,2019-07-11,18:33:53,2019-07-12,16:27:15,36.164945,-86.780144,36.175640,-86.757861,[],2019-07-13 06:22:59.467
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
509569,2019-07-19 00:07:18.803,Lyft,LFT10,Powered767853,-4.618833,3061.02372,2019-07-18,23:53:53.926666,2019-07-18,23:49:16.796666,36.151550,-86.784070,36.153540,-86.777900,"[(36.15155, -86.78407), (36.15153, -86.78401),...",2019-07-20 10:52:39.343
378583,2019-06-21 21:44:53.863,Lyft,LFT1318,Powered220544,-8.003717,3484.25208,2019-06-21,21:32:09.170000,2019-06-21,21:24:08.946666,36.155610,-86.775050,36.155360,-86.775080,"[(36.15561, -86.77505), (36.15565, -86.77509),...",2019-06-22 08:31:51.090
509545,2019-07-19 00:01:24.063,Lyft,LFT2,Powered859498,-10.242417,52.49344,2019-07-18,23:59:35.683333,2019-07-18,23:49:21.136666,36.151630,-86.784180,36.151720,-86.784080,"[(36.15163, -86.78418), (36.15167, -86.78413),...",2019-07-20 10:52:39.020
509592,2019-07-19 00:12:05.363,Lyft,LFT18,Powered863342,-10.975100,3641.73240,2019-07-19,00:00:24.016666,2019-07-18,23:49:25.513333,36.156940,-86.781040,36.163050,-86.776050,"[(36.15694, -86.78104), (36.1571, -86.78111), ...",2019-07-20 10:52:39.657


In [20]:
trips['MM-DD']= trips['pubtimestamp'].dt.strftime('%m-%d')

In [21]:
trips['MM-DD']

0         05-01
1         05-01
2         05-01
3         05-01
4         05-01
          ...  
565517    08-01
565518    08-01
565519    08-01
565520    08-01
565521    08-01
Name: MM-DD, Length: 565522, dtype: object

In [22]:
trips[['MM-DD', 'sumdid']].groupby(['MM-DD', 'sumdid']).size()

MM-DD  sumdid                                     
05-01  Powered003176                                  1
       Powered005832                                  1
       Powered009634                                  1
       Powered013719                                  1
       Powered020762                                  2
                                                     ..
08-01  PoweredYTT8X                                   1
       Powereda4712099-5e55-5332-996c-d6e5c910535d    1
       Poweredb8a3a269-d1ca-571f-9b2f-89b7399b5537    1
       Poweredd913663d-659b-5afd-9a37-a1b85c6a36b2    1
       Poweredf077a919-d569-5e70-8ca7-71d179ffacf9    1
Length: 202389, dtype: int64

In [24]:
number3= trips[['MM-DD', 'sumdid', 'companyname']].groupby(['MM-DD', 'sumdid'])

In [26]:
number3 = number3.value_counts().reset_index()

In [28]:
number3.groupby(['MM-DD', 'sumdid', 'companyname'])['count'].mean()

MM-DD  sumdid                                       companyname
05-01  Powered003176                                Lyft           1.0
       Powered005832                                Lyft           1.0
       Powered009634                                Lyft           1.0
       Powered013719                                Lyft           1.0
       Powered020762                                Lyft           2.0
                                                                  ... 
08-01  PoweredYTT8X                                 Bird           1.0
       Powereda4712099-5e55-5332-996c-d6e5c910535d  JUMP           1.0
       Poweredb8a3a269-d1ca-571f-9b2f-89b7399b5537  JUMP           1.0
       Poweredd913663d-659b-5afd-9a37-a1b85c6a36b2  JUMP           1.0
       Poweredf077a919-d569-5e70-8ca7-71d179ffacf9  JUMP           1.0
Name: count, Length: 202389, dtype: float64

In [29]:
number3.groupby(['MM-DD','companyname'])['count'].mean()

MM-DD  companyname  
05-01  Bird             1.546218
       Lyft             2.707617
05-02  Bird             1.612946
       Lime             4.708013
       Lyft             2.291855
                          ...   
08-01  Bolt Mobility    1.000000
       JUMP             1.000000
       Lime             1.000000
       Lyft             1.000000
       SPIN             1.000000
Name: count, Length: 528, dtype: float64

In [None]:
query = '''
select COUNT(DISTINCT sumdid), 
FROM scooters
GROUP BY companyname '''

pd.read_sql(query, con = engine)