In [1]:
from sqlalchemy import create_engine, text
import pandas as pd
database_name = 'scooters'
connection_string = f"postgresql://postgres:postgres@localhost:5433/{database_name}"
engine = create_engine(connection_string)

In [2]:
find_nulls = '''
SELECT *
FROM scooters
WHERE 
    pubdatetime IS NULL
    OR latitude IS NULL
    OR longitude IS NULL
    OR sumdid IS NULL
    OR sumdtype IS NULL
    OR chargelevel IS NULL
    OR sumdgroup IS NULL
    OR costpermin IS NULL
    OR companyname IS NULL;
'''

with engine.connect() as connection:
    result = pd.read_sql(text(find_nulls), con = connection)

result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 770 entries, 0 to 769
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   pubdatetime  770 non-null    datetime64[ns]
 1   latitude     770 non-null    float64       
 2   longitude    770 non-null    float64       
 3   sumdid       770 non-null    object        
 4   sumdtype     770 non-null    object        
 5   chargelevel  0 non-null      object        
 6   sumdgroup    770 non-null    object        
 7   costpermin   770 non-null    float64       
 8   companyname  770 non-null    object        
dtypes: datetime64[ns](1), float64(3), object(5)
memory usage: 54.3+ KB


From scooters table:
- earliest timestamp: 2019-05-01 00:01:41.247
- latest timestamp: 2019-07-31 23:59:57

From trips table:
- earliest timestamp: 2019-05-01 00:00:55.423
- latest timestamp: 2019-08-01 07:04:00

In [3]:
long_trips = '''
SELECT 
    companyname,
    COUNT(*)
FROM trips
WHERE tripduration > 1440
GROUP BY companyname;
'''

with engine.connect() as connection:
    result = pd.read_sql(text(long_trips), con = connection)

result

Unnamed: 0,companyname,count
0,Bolt Mobility,6908
1,Lyft,2
2,SPIN,28


In [4]:
short_trips = '''
SELECT
    companyname,
    COUNT(*)
FROM trips
WHERE tripduration < 1
GROUP BY companyname;
'''

with engine.connect() as connection:
    result = pd.read_sql(text(short_trips), con = connection)

result

Unnamed: 0,companyname,count
0,Bird,3963
1,Lime,661
2,Lyft,4530


In [None]:
unused = '''
SELECT
    companyname company,
    COUNT(DISTINCT sumdid) total_scooters
FROM scooters
GROUP BY companyname
UNION ALL
SELECT
    companyname company,
    COUNT(DISTINCT sumdid) scooters_used
FROM trips
GROUP BY companyname;
'''

with engine.connect() as connection:
    result = pd.read_sql(text(unused), con = connection)

result

In [6]:
availability = '''
SELECT
    companyname company,
    DATE(pubdatetime) date,
    COUNT(DISTINCT sumdid) available_scooters
FROM scooters
GROUP BY company, pubdatetime
'''

with engine.connect() as connection:
    result = pd.read_sql(text(availability), con = connection)
    
result

KeyboardInterrupt: 

In [None]:
daily_use = '''
SELECT
    companyname company,
    COUNT(DISTINCT sumdid) total_scooters,
    COUNT(DISTINCT startdate) days_used
FROM trips
GROUP BY companyname;
'''

with engine.connect() as connection:
    result = pd.read_sql(text(daily_use), con = connection)

result