## Import Libraries

In [1]:
import pandas as pd
from sqlalchemy import create_engine

## Configuration

In [2]:
# Database Connection Configuration
user = "postgres"
password = "Qwe.123*"
host = "localhost"
port = "5432"
database = "RWventas"
schema = "public"

# CSV Path
csv_path = "./data/clean_sales.csv"

## Create Star Schema

In [3]:
import pandas as pd
from sqlalchemy import create_engine

# -------------------------
# CONFIGURATION
# -------------------------
user = "postgres"
password = "Qwe.123*"
host = "localhost"
port = "5432"
database = "RWventas"
schema = "public"

csv_path = r"C:\Users\Coder\Documents\Prueba Desempe√±o Analytics\data\clean_sales.csv"

# -------------------------
# LOAD CSV
# -------------------------
df = pd.read_csv(csv_path)

# -------------------------
# CREATE DIMENSIONS
# -------------------------

# DimProduct
dim_product = df[['Producto', 'Tipo_Producto']].drop_duplicates().reset_index(drop=True)
dim_product['product_id'] = dim_product.index + 1

# DimClient
dim_client = df[['Tipo_Cliente']].drop_duplicates().reset_index(drop=True)
dim_client['client_id'] = dim_client.index + 1

# DimSaleType
dim_sale_type = df[['Tipo_Venta']].drop_duplicates().reset_index(drop=True)
dim_sale_type['sale_type_id'] = dim_sale_type.index + 1

# DimCity
dim_city = df[['Ciudad']].drop_duplicates().reset_index(drop=True)
dim_city['city_id'] = dim_city.index + 1

# DimDate
df['Fecha'] = pd.to_datetime(df['Fecha'])
dim_date = pd.DataFrame()
dim_date['Fecha'] = df['Fecha'].drop_duplicates().sort_values()
dim_date['date_id'] = range(1, len(dim_date) + 1)
dim_date['year'] = dim_date['Fecha'].dt.year
dim_date['month'] = dim_date['Fecha'].dt.month
dim_date['day'] = dim_date['Fecha'].dt.day
dim_date['quarter'] = dim_date['Fecha'].dt.quarter

# -------------------------
# CREATE FACT TABLE
# -------------------------
fact_sales = (
    df.merge(dim_product, on=['Producto', 'Tipo_Producto'])
      .merge(dim_client, on=['Tipo_Cliente'])
      .merge(dim_sale_type, on=['Tipo_Venta'])
      .merge(dim_city, on=['Ciudad'])
      .merge(dim_date, on=['Fecha'])
)

fact_sales = fact_sales[[
    'date_id', 'product_id', 'client_id', 'sale_type_id', 'city_id',
    'Cantidad', 'Precio_Unitario', 'Descuento', 'Costo_Envio', 'Costo_Total'
]]

# -------------------------
# LOAD INTO POSTGRES
# -------------------------
engine = create_engine(
    f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}"
)

dim_product.to_sql("DimProduct", engine, schema=schema, if_exists="replace", index=False)
dim_client.to_sql("DimClient", engine, schema=schema, if_exists="replace", index=False)
dim_sale_type.to_sql("DimSaleType", engine, schema=schema, if_exists="replace", index=False)
dim_city.to_sql("DimCity", engine, schema=schema, if_exists="replace", index=False)
dim_date.to_sql("DimDate", engine, schema=schema, if_exists="replace", index=False)
fact_sales.to_sql("FactSales", engine, schema=schema, if_exists="replace", index=False)

print("\nStar Schema successfully loaded into RWventas:")
print(" - DimProduct")
print(" - DimClient")
print(" - DimSaleType")
print(" - DimCity")
print(" - DimDate")
print(" - FactSales")



Star Schema successfully loaded into Riwi_Ventas:
 - DimProduct
 - DimClient
 - DimSaleType
 - DimCity
 - DimDate
 - FactSales
