In [None]:
import pandas as pd
import datetime as dt
from sqlalchemy import create_engine, text
import numpy as np

In [None]:
database_name = 'scooters'

connection_string = f"postgresql://postgres:postgres@localhost:5050/{database_name}"

engine = create_engine(connection_string)

query = '''
SELECT *
FROM scooters
LIMIT 10000;
'''

with engine.connect() as connection:    
    scooters = pd.read_sql(text(query), con = connection)
    
scooters

In [None]:
query = '''
SELECT *
FROM scooters
WHERE sumdtype = 'Standard'
LIMIT 10000;
'''

with engine.connect() as connection:    
    standard = pd.read_sql(text(query), con = connection)
    
standard

In [None]:
query = '''
SELECT *
FROM trips
'''

with engine.connect() as connection:    
    trips = pd.read_sql(text(query), con = connection)
    
trips

In [None]:
trips['pubtimestamp'] = pd.to_datetime(trips['pubtimestamp'])
trips

In [None]:
trips.loc[trips.pubtimestamp != '2019-05-27 00:40:22.117']

In [None]:
trips.sort_values('enddate', ascending = True)

In [None]:
trips.groupby('sumdid').agg(mean = ('tripduration', np.mean), 
                            median = ('tripduration', np.median), 
                            min = ('tripduration', np.min), 
                            max = ('tripduration', np.max))

# Min and Max Lat/Long

In [None]:
query = '''
SELECT MIN(startlatitude)  AS min_start_lat, MAX(startlatitude)  AS max_start_lat,
       MIN(startlongitude) AS min_start_lng, MAX(startlongitude) AS max_start_lng,
       MIN(endlatitude)    AS min_end_lat,   MAX(endlatitude)    AS max_end_lat,
       MIN(endlongitude)   AS min_end_lng,   MAX(endlongitude)   AS max_end_lng
FROM trips
'''

with engine.connect() as connection:    
    trips_min_max = pd.read_sql(text(query), con = connection)
    
trips_min_max

In [None]:
query = '''
SELECT MIN(latitude)  AS min_lat,  MAX(latitude)  AS max_lat,
	   MIN(longitude) AS min_long, MAX(longitude) AS max_long
FROM scooters
'''

with engine.connect() as connection:    
    scooters_min_max = pd.read_sql(text(query), con = connection)
    
scooters_min_max

### A lot of these extreme values don't make sense. From the trips table, the min and max latitude are way different from the coordinates of Nashville. The same goes for the min and max longitude.

### The scooters table doesn't do a lot better. The closest min or max comes tnashville is in the form of 0. I wonder if there was a bug with one of the scooters that returned the coordinates (0,0) since there isn't even a decimal afterwards. The craziest one, however, is the max latitude from the scooters table at over 3 million. I have no idea how this would have occurred, but it is pretty silly to think about.




# Comapany Names in Both Tables

In [None]:
query = '''
SELECT DISTINCT companyname
FROM scooters
'''
with engine.connect() as connection:    
    scooters_company = pd.read_sql(text(query), con = connection)
    
scooters_company

In [None]:
query = '''
SELECT DISTINCT companyname
FROM trips
'''
with engine.connect() as connection:    
    scooters_trips = pd.read_sql(text(query), con = connection)
    
scooters_trips

### Both tables refer to the same companies, but Bolt is called Bolt Mobility in the trips table, and Jump and Spin are both full caps in the trips table.