## Importar librerías

In [1]:
from datetime import datetime # Tiempo actual
import pandas as pd # Manipulacion de datos
import sqlalchemy as sa # Conexon a la BD
import utils.conn_tools as ct  # Util para la configuracion de la BD
import holidays # Para obtener los días festivos

## Conexión con base de datos

In [2]:
config = ct.readConfig("../config/config-postgres.yaml")

config_src = config["source"]
config_etl = config["warehouse"]

print(config_src)
print(config_etl)

engine_src = sa.create_engine(ct.generateConnUrl(config_src))
engine_etl = sa.create_engine(ct.generateConnUrl(config_etl))

{'dialect': 'postgresql', 'host': 'localhost', 'port': 5432, 'username': 'Julian', 'password': 'J05r2003!', 'database': 'rapidos_y_furiosos'}
{'dialect': 'postgresql', 'host': 'localhost', 'port': 5432, 'username': 'Julian', 'password': 'J05r2003!', 'database': 'ryf_warehouse'}


## Extract

## Transform: Creación de la dimensión

### Fecha completa sin hora

In [3]:
# 2009-01-04 is YYYY-MM-DD
dim_fecha = pd.DataFrame({"date" : pd.date_range(start="2023-01-01", end="2024-10-30", freq="D")})
dim_fecha

Unnamed: 0,date
0,2023-01-01
1,2023-01-02
2,2023-01-03
3,2023-01-04
4,2023-01-05
...,...
664,2024-10-26
665,2024-10-27
666,2024-10-28
667,2024-10-29


### Extraer detalles de fecha (año, mes, dia, dia de la semana...)

In [4]:
dim_fecha["date_mmdd"] = dim_fecha["date"].dt.strftime("%m/%d")
dim_fecha["date_yyyymmdd"] = dim_fecha["date"].dt.strftime("%Y/%m/%d")

dim_fecha["day"] = dim_fecha["date"].dt.day
dim_fecha['day_of_week'] = dim_fecha['date'].dt.dayofweek
dim_fecha['day_of_week_name'] = dim_fecha['date'].dt.day_name().str.lower()

dim_fecha['month'] = dim_fecha['date'].dt.month
dim_fecha["month_name"] = dim_fecha['date'].dt.month_name().str.lower()

dim_fecha['year'] = dim_fecha['date'].dt.year 

dim_fecha["week_of_month"] = (dim_fecha["date"].dt.day - 1) // 7 + 1

dim_fecha["quarter"] = dim_fecha["date"].dt.quarter


# Festivos con libreria Holidays
co_holidays = holidays.CO(language="es")  # Objeto con festivos en Colombia
dim_fecha["is_holiday"] = dim_fecha["date"].apply(
    lambda x: x in co_holidays
)  
dim_fecha["holiday_name"] = dim_fecha["date"].apply(lambda x: co_holidays.get(x, None))

dim_fecha['is_leap_year'] = dim_fecha['date'].dt.is_leap_year

dim_fecha

Unnamed: 0,date,date_mmdd,date_yyyymmdd,day,day_of_week,day_of_week_name,month,month_name,year,week_of_month,quarter,is_holiday,holiday_name,is_leap_year
0,2023-01-01,01/01,2023/01/01,1,6,sunday,1,january,2023,1,1,True,Año Nuevo,False
1,2023-01-02,01/02,2023/01/02,2,0,monday,1,january,2023,1,1,False,,False
2,2023-01-03,01/03,2023/01/03,3,1,tuesday,1,january,2023,1,1,False,,False
3,2023-01-04,01/04,2023/01/04,4,2,wednesday,1,january,2023,1,1,False,,False
4,2023-01-05,01/05,2023/01/05,5,3,thursday,1,january,2023,1,1,False,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664,2024-10-26,10/26,2024/10/26,26,5,saturday,10,october,2024,4,4,False,,True
665,2024-10-27,10/27,2024/10/27,27,6,sunday,10,october,2024,4,4,False,,True
666,2024-10-28,10/28,2024/10/28,28,0,monday,10,october,2024,4,4,False,,True
667,2024-10-29,10/29,2024/10/29,29,1,tuesday,10,october,2024,5,4,False,,True


## Load

In [5]:
# Create table
metadata = sa.MetaData()

metadata.reflect(bind=engine_etl)  # Reflect helps to get existing tables

dim_fecha_table = sa.Table(
    "dim_fecha",
    metadata,
    # Primary key
    sa.Column("key_fecha", sa.Integer, primary_key=True, autoincrement=True),
    # Dimension keys
    sa.Column("date", sa.Date, nullable=False),
    sa.Column("date_mmdd", sa.String, nullable=False),
    sa.Column("date_yyyymmdd", sa.String, nullable=False),
    sa.Column("day", sa.Integer, nullable=False),
    sa.Column("day_of_week", sa.Integer, nullable=False),
    sa.Column("day_of_week_name", sa.String, nullable=False),
    sa.Column("month", sa.Integer, nullable=False),
    sa.Column("month_name", sa.String, nullable=False),
    sa.Column("year", sa.Integer, nullable=False),
    sa.Column("week_of_month", sa.Integer, nullable=False),
    sa.Column("quarter", sa.Integer, nullable=False),
    # Times in minutes
    sa.Column("is_holiday", sa.Boolean, nullable=False),
    sa.Column("holiday_name", sa.String, nullable=True),
    sa.Column("is_leap_year", sa.Boolean, nullable=False),
)

metadata.create_all(engine_etl)

InvalidRequestError: Table 'dim_fecha' is already defined for this MetaData instance.  Specify 'extend_existing=True' to redefine options and columns on an existing Table object.

In [None]:
dim_fecha.to_sql(
    name="dim_fecha",
    con=engine_etl,
    if_exists="append", # Create table if not exists, else append
    index=False, # Do not create index because we already have one PK
)