In [8]:
import psycopg2 as pg
import os
import pandas as pd
import numpy as np
%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [45]:
tlc_query = """
SELECT
    pickup_datetime, dropoff_datetime, passenger_count, 
    trip_distance, payment_type_id, fare_amount, 
    tip_amount, total_amount, trip_duration, avg_speed,
    temperature_min, temperature_max, temperature_avg,
    percipitation,
    date, DATE(pickup_datetime) as pu_date
FROM taxi_trip 
    LEFT JOIN taxi_trip_features ON 
        taxi_trip.id = taxi_trip_features.taxi_trip_id
    LEFT JOIN weather ON
        DATE(taxi_trip.pickup_datetime) = weather.date;
"""

In [47]:
weather_query = """
SELECT
    *
FROM weather;
"""

In [48]:
locations_query = """
SELECT
    *
FROM locations;
"""

In [49]:
file_downloads_query = """
SELECT
    *
FROM taxi_trip_file_downloads;
"""

In [50]:
def get_df_from_query(query):
    conn_data={
                "host": os.getenv("POSTGRES_HOST"),
                "port": os.getenv("POSTGRES_PORT"),
                "dbname": os.getenv("POSTGRES_NAME"),
                "user": os.getenv("POSTGRES_USER"),
                "password": os.getenv("POSTGRES_PASSWORD"),
            }
    connection = pg.connect(**conn_data)
    df = pd.read_sql(query, connection)
    connection.close()
    del conn_data
    return df

In [57]:
tlc = get_df_from_query(tlc_query)
weather = get_df_from_query(weather_query)
locations = get_df_from_query(locations_query)
file_downloads = get_df_from_query(file_downloads_query)
print(f"tlc: {tlc.shape}, weather: {weather.shape}, locations: {locations.shape}, files: {file_downloads.shape}")

tlc: (26668, 16), weather: (5087, 12), locations: (265, 4), files: (5, 4)


In [53]:
weather.columns

Index(['id', 'station_id', 'station_name', 'date', 'temperature_min',
       'temperature_max', 'temperature_avg', 'percipitation', 'windspeed_avg',
       'snow', 'snow_depth', 'sunshine_duration'],
      dtype='object')

In [55]:
weather.head()

Unnamed: 0,id,station_id,station_name,date,temperature_min,temperature_max,temperature_avg,percipitation,windspeed_avg,snow,snow_depth,sunshine_duration
0,1,USW00094728,"NY CITY CENTRAL PARK, NY US",2009-01-01,-9.4,-3.3,-6.35,0.0,5.0,0.0,0.0,
1,2,USW00094728,"NY CITY CENTRAL PARK, NY US",2009-01-02,-5.0,1.1,-1.95,0.0,2.8,0.0,0.0,
2,3,USW00094728,"NY CITY CENTRAL PARK, NY US",2009-01-03,-1.7,3.3,0.8,0.0,4.5,0.0,0.0,
3,4,USW00094728,"NY CITY CENTRAL PARK, NY US",2009-01-04,-3.9,5.6,0.85,0.0,3.4,0.0,0.0,
4,5,USW00094728,"NY CITY CENTRAL PARK, NY US",2009-01-05,3.3,6.1,4.7,0.0,3.1,0.0,0.0,
