In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sqlalchemy import create_engine, text

In [2]:
db_name = 'scooters'
con_str = 'f"postgresql://postgres:postgres@localhost:5432/{db_name}'

In [3]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/scooters')

What is each company name represented in scooters table?

In [4]:
all_companies_q = '''
SELECT DISTINCT companyname
FROM scooters
;
'''
with engine.connect() as connection:    
    company = pd.read_sql(text(all_companies_q), con = connection)

In [5]:
company

Unnamed: 0,companyname
0,Bird
1,Bolt
2,Gotcha
3,Jump
4,Lime
5,Lyft
6,Spin


What is each company name represented in trips table?

In [36]:
all_companies_q2 = '''
SELECT DISTINCT companyname
FROM trips
LIMIT 10;
'''
with engine.connect() as connection:    
    company2 = pd.read_sql(text(all_companies_q2), con = connection)

In [37]:
company2

Unnamed: 0,companyname
0,Bird
1,Bolt Mobility
2,Gotcha
3,JUMP
4,Lime
5,Lyft
6,SPIN


What are minimum and maximum values for longitude and latitude?

In [8]:
min_latlong = '''
SELECT MIN(latitude) AS min_lat,
       MAX(latitude) AS max_lat,
       MIN(longitude) AS min_long,
       MAX(longitude) AS max_long
FROM scooters;
'''
with engine.connect() as connection:    
    latlong_min = pd.read_sql(text(min_latlong), con = connection)

In [13]:
latlong_min

Unnamed: 0,min_lat,max_lat,min_long,max_long
0,0.0,3609874.0,-97.443879,0.0


In [17]:
latlong_info = '''
SELECT scooters.pubdatetime,
       trips.startdate,
       scooters.latitude
FROM trips
INNER JOIN scooters ON scooters.sumdid=trips.sumdid
WHERE latitude = (SELECT MAX(latitude)
                  FROM scooters)
LIMIT (25);
'''
with engine.connect() as connection:
    big_lat = pd.read_sql(text(latlong_info),con = connection)

In [18]:
big_lat

Unnamed: 0,pubdatetime,startdate,latitude
0,2019-07-16 13:57:19.230,2019-07-16,3609874.0
1,2019-07-16 14:02:19.390,2019-07-16,3609874.0
2,2019-07-16 13:07:17.510,2019-07-16,3609874.0
3,2019-07-16 13:12:17.600,2019-07-16,3609874.0
4,2019-07-16 13:17:18.007,2019-07-16,3609874.0
5,2019-07-16 13:22:18.187,2019-07-16,3609874.0
6,2019-07-16 13:27:18.393,2019-07-16,3609874.0
7,2019-07-16 13:32:18.437,2019-07-16,3609874.0
8,2019-07-16 13:37:18.660,2019-07-16,3609874.0
9,2019-07-16 13:42:18.747,2019-07-16,3609874.0


What are the minimum and maximum trip distance?

In [42]:
trip_dist = '''
SELECT MIN(tripdistance) AS min_trip,
       MAX(tripdistance) AS max_trip
FROM trips;
'''
with engine.connect() as connection:    
    trip_dist_range = pd.read_sql(text(trip_dist), con = connection)

In [41]:
trip_dist_range

Unnamed: 0,min_trip,max_trap
0,-20324803.8,31884480.0


How many trips per company in trips table?

In [19]:
reg_distance = '''
SELECT AVG(tripdistance) AS total_trips,
       companyname
FROM trips
GROUP BY companyname;
'''
with engine.connect() as connection:
    reg_dist = pd.read_sql(text(reg_distance), con = connection)

In [20]:
reg_dist

Unnamed: 0,total_trips,companyname
0,4134.142527,Bird
1,8094.324012,Bolt Mobility
2,11292.679898,Gotcha
3,5986.012366,JUMP
4,4198.204512,Lime
5,6474.65654,Lyft
6,2898.701852,SPIN


In [23]:
max_distance = '''
SELECT COUNT(tripdistance) AS total_trips,
       companyname
FROM trips
WHERE tripdistance = (SELECT MAX(tripdistance)
                      FROM trips)
GROUP BY companyname;
'''
with engine.connect() as connection:
    max_dist = pd.read_sql(text(max_distance), con = connection)

In [24]:
max_dist

Unnamed: 0,total_trips,companyname
0,1,Gotcha


In [27]:
min_distance = '''
SELECT COUNT(tripdistance) AS total_trips,
       companyname
FROM trips
WHERE tripdistance <0
GROUP BY companyname;
'''
with engine.connect() as connection:
    min_dist = pd.read_sql(text(min_distance), con = connection)

In [28]:
min_dist

Unnamed: 0,total_trips,companyname
0,32,Bird


How many records by group of SUMD?

In [58]:
dist_type = '''
SELECT COUNT(*) AS total,
       sumdgroup
FROM scooters
GROUP BY sumdgroup;
'''
with engine.connect() as connection:
    dist_types = pd.read_sql(text(dist_type), con = connection)

In [59]:
dist_types

Unnamed: 0,total,sumdgroup
0,26529,bicycle
1,59671463,scooter
2,13716051,Scooter


In [60]:
bike_data = '''
SELECT *
FROM scooters
WHERE sumdgroup LIKE 'bicycle'
'''
with engine.connect() as connection:
    bikes = pd.read_sql(text(bike_data), con = connection)

In [62]:
bikes.head(5)

Unnamed: 0,pubdatetime,latitude,longitude,sumdid,sumdtype,chargelevel,sumdgroup,costpermin,companyname
0,2019-05-01 00:04:56.910,36.150966,-86.852773,StandardNW5HJFO4R32LY,Standard,100.0,bicycle,0.0,Lime
1,2019-05-01 00:09:56.970,36.150966,-86.852773,StandardNW5HJFO4R32LY,Standard,100.0,bicycle,0.0,Lime
2,2019-05-01 00:14:58.037,36.150966,-86.852773,StandardNW5HJFO4R32LY,Standard,100.0,bicycle,0.0,Lime
3,2019-05-01 00:19:57.383,36.150966,-86.852773,StandardNW5HJFO4R32LY,Standard,100.0,bicycle,0.0,Lime
4,2019-05-01 00:24:57.743,36.150966,-86.852773,StandardNW5HJFO4R32LY,Standard,100.0,bicycle,0.0,Lime


In [67]:
num_bikes = '''
SELECT sumdid,
       COUNT(*) AS rides
FROM scooters
WHERE sumdgroup LIKE 'bicycle'
GROUP BY sumdid
'''
with engine.connect() as connection:
    bikes_num = pd.read_sql(text(num_bikes), con = connection)

In [68]:
bikes_num

Unnamed: 0,sumdid,rides
0,Standard2UGJKREVB53HT,26
1,Standard5JXOV277MCWID,6
2,StandardNPOOZNUSGAXZN,9
3,StandardNUTLLXP4G37OI,6
4,StandardNW5HJFO4R32LY,26476
5,StandardZPUQESHVPP74J,6
