In [None]:
import pandas as pd
import datetime as dt
from sqlalchemy import create_engine, text
import numpy as np

In [None]:
database_name = 'scooters'

connection_string = f"postgresql://postgres:postgres@localhost:5050/{database_name}"

engine = create_engine(connection_string)

## Bolt Mobility has their timeduration in seconds instead of minutes. So, we first make a df where the time in minutes is shown.

In [None]:
query = '''
SELECT *,
       CASE WHEN companyname = 'Bolt Mobility' THEN ROUND(tripduration/60,2)
   		    ELSE tripduration END AS trip_in_min
FROM trips;
'''

with engine.connect() as connection:    
    trips_min = pd.read_sql(text(query), con = connection)
    
trips_min

## I then want to bring in my "calculated difference" info so that we have access to it.

In [None]:
query = '''
WITH trip_min AS (
	SELECT pubtimestamp, sumdid, 
           companyname, startdate, starttime, enddate, endtime, tripduration,
	       CASE WHEN companyname = 'Bolt Mobility' THEN ROUND(tripduration/60,2)
	   		    ELSE tripduration END AS trip_in_min
	FROM trips)

SELECT pubtimestamp, sumdid,
       companyname, startdate, starttime, 
       enddate, endtime, trip_in_min, 
	   (trip_in_min - trip_in_min%1)::INT AS rounded_tripduration,
	   (1440 * (enddate - startdate)) AS day_diff, 
	   EXTRACT(epoch FROM (endtime - starttime))/60 AS min,
	   (EXTRACT(epoch FROM (endtime - starttime))/60)%1 AS round_off,
	   ((1440 * (enddate - startdate)) + (EXTRACT(epoch FROM (endtime - starttime))/60) - (EXTRACT(epoch FROM (endtime - starttime))/60)%1)::INT AS calc_diff
FROM trip_min
'''

with engine.connect() as connection:    
    calc_diff = pd.read_sql(text(query), con = connection)

calc_diff

## Then we merge the two

In [None]:
trips_calc = pd.merge(trips_min, calc_diff, how = 'inner', 
                      on = ('pubtimestamp', 'sumdid', 'companyname', 
                            'startdate', 'starttime', 'enddate', 'endtime', 'trip_in_min'))
trips_calc

## To look for errors, we have to find where the time duration (in minutes) does not closely match the calculated difference.

In [None]:
# This results in an error, I'm still working on it!
#trips_calc.loc[trips_calc.rounded_tripduration.isin(calc_diff-1, calc_diff, calc_diff+1)]