In [6]:
import pandas as pd
from sqlalchemy import create_engine, text

# --- Configuration (Ensure these match your previous setup) ---
DB_USER = "postgres"
DB_PASS = "etes1209111"      # Use your confirmed password
DB_HOST = "127.0.0.1"    
DB_PORT = "5432"
DB_NAME = "nyc_taxi"
TABLE_NAME = "taxi_trips"

# SQLAlchemy Connection String
DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(DATABASE_URL)

print("✅ Database engine created. Ready to execute SQL queries.")

✅ Database engine created. Ready to execute SQL queries.


In [7]:
# A. Create Dim_Weather table
sql_create_dim_weather = f"""
DROP TABLE IF EXISTS dim_weather;
CREATE TABLE dim_weather AS
SELECT DISTINCT
    pickup_date AS weather_key, -- Date is the unique primary key
    max_temp,
    min_temp,
    precipitation,
    rain_day
FROM
    {TABLE_NAME};

ALTER TABLE dim_weather ADD PRIMARY KEY (weather_key);
"""
print("Defining SQL for Dim_Weather...")

Defining SQL for Dim_Weather...


In [8]:
# B. Create Dim_Date table
sql_create_dim_date = f"""
DROP TABLE IF EXISTS dim_date;
CREATE TABLE dim_date AS
SELECT DISTINCT
    pickup_date AS date_key,
    EXTRACT(YEAR FROM pickup_date) AS calendar_year,
    EXTRACT(MONTH FROM pickup_date) AS month_num,
    TO_CHAR(pickup_date, 'Month') AS month_name,
    EXTRACT(WEEK FROM pickup_date) AS week_num,
    CASE WHEN EXTRACT(DOW FROM pickup_date) IN (0, 6) THEN 'Weekend' ELSE 'Weekday' END AS is_weekend,
    pickup_dayofweek
FROM
    {TABLE_NAME};

ALTER TABLE dim_date ADD PRIMARY KEY (date_key);
"""
print("Defining SQL for Dim_Date...")

Defining SQL for Dim_Date...


In [9]:
with engine.connect() as connection:
    print("Executing Dim_Weather creation...")
    connection.execute(text(sql_create_dim_weather))
    
    print("Executing Dim_Date creation...")
    connection.execute(text(sql_create_dim_date))
    
    connection.commit()
    print("✅ Dimension tables (dim_weather, dim_date) created successfully.")

Executing Dim_Weather creation...
Executing Dim_Date creation...
✅ Dimension tables (dim_weather, dim_date) created successfully.


In [10]:
# C. Create the Fact Table
sql_create_fact_table = f"""
DROP TABLE IF EXISTS fact_taxi_trips;
CREATE TABLE fact_taxi_trips AS
SELECT
    tpep_pickup_datetime,
    tpep_dropoff_datetime,
    pickup_date AS date_key,      -- Key to link to dim_date and dim_weather
    pulocationid,                 -- Key (assumed) to link to Dim_Location
    dolocationid,                 -- Key (assumed) to link to Dim_Location
    payment_type,                 -- Key to link to Dim_Payment
    
    -- Metrics (The measurable values)
    passenger_count,
    trip_distance,
    fare_amount,
    tip_amount,
    total_amount,
    congestion_surcharge,
    airport_fee,
    trip_duration
FROM
    {TABLE_NAME};

-- Add an index on the date_key for faster joins in Power BI
CREATE INDEX idx_fact_date ON fact_taxi_trips (date_key);
"""

with engine.connect() as connection:
    print("Executing Fact table creation...")
    connection.execute(text(sql_create_fact_table))
    connection.commit()
    print("✅ Fact table (fact_taxi_trips) created and indexed successfully.")

Executing Fact table creation...
✅ Fact table (fact_taxi_trips) created and indexed successfully.
