# ETL Pipeline silver-to-gold - Uber Dataset

Este bloco importa todas as bibliotecas Python necess[arias para o pipeline, como pandas para manipulação de dados.

In [1]:
from sqlalchemy import create_engine, text
import pandas as pd
import os

# Etapa 0 - Criando coneção com o banco

In [8]:
db_user = os.getenv('POSTGRES_USER', 'admin')
db_password = os.getenv('POSTGRES_PASSWORD', 'admin')
db_name = os.getenv('POSTGRES_DB', 'postgres')
db_host = 'localhost'

silver_engine = create_engine(f"postgresql://{db_user}:{db_password}@{db_host}/{db_name}")
gold_engine = create_engine(f"postgresql://{db_user}:{db_password}@{db_host}/uber_gold")

# Leitura da camada silver

In [9]:
silver_df = pd.read_sql("SELECT * FROM uber_silver", silver_engine)

dim_cus = silver_df[['customer_id']].drop_duplicates().reset_index(drop=True)
dim_cus['srk_cus'] = dim_cus.index + 1 

dim_veh = silver_df[['vehicle_type']].drop_duplicates().reset_index(drop=True)
dim_veh['srk_veh'] = dim_veh.index + 1

dim_pay = silver_df[['payment_method']].drop_duplicates().reset_index(drop=True)
dim_pay['srk_pay'] = dim_pay.index + 1

dim_pay = silver_df[['payment_method']].drop_duplicates().reset_index(drop=True)
dim_pay['srk_pay'] = dim_pay.index + 1

dim_loc = silver_df[['pickup_location', 'drop_location']].drop_duplicates().reset_index(drop=True)
dim_loc['srk_loc'] = dim_loc.index + 1

fat = (
    silver_df
    .merge(dim_cus, on='customer_id', how='left')
    .merge(dim_veh, on='vehicle_type', how='left')
    .merge(dim_pay, on='payment_method', how='left')
    .merge(dim_loc, on=['pickup_location', 'drop_location'], how='left')
)

fat_rid = fat[[
    'date_time', 'booking_status', 'avg_vtat', 'avg_ctat', 'cancelled_by',
    'reason_for_cancelling', 'booking_value', 'ride_distance', 'driver_rating',
    'customer_rating', 'srk_cus', 'srk_veh', 'srk_pay', 'srk_loc'
]].reset_index(drop=True)
fat_rid['srk_rid'] = fat_rid.index + 1

# Load para o banco gold

In [10]:
dim_cus.to_sql("dim_cus", gold_engine, if_exists="replace", index=False)
dim_veh.to_sql("dim_veh", gold_engine, if_exists="replace", index=False)
dim_pay.to_sql("dim_pay", gold_engine, if_exists="replace", index=False)
dim_loc.to_sql("dim_loc", gold_engine, if_exists="replace", index=False)
fat_rid.to_sql("fat_rid", gold_engine, if_exists="replace", index=False)

767