In [1]:
import datetime
from datetime import date

import numpy as np
import pandas as pd
import yaml
from sqlalchemy import create_engine


# database connections 

In [2]:
# database connections
with open('../config_fill.yml', 'r') as f:
    config = yaml.safe_load(f)
    config_aw = config['Adventure_Works']
    config_etl = config['ETL_PRO']

url_aw = f"mssql+pyodbc://@{config_aw['host']}/{config_aw['dbname']}?driver={config_aw['driver'].replace(' ', '+')}&trusted_connection={config_aw['trusted_connection']}"
url_etl = f"{config_etl['drivername']}://{config_etl['user']}:{config_etl['password']}@{config_etl['host']}:{config_etl['port']}/{config_etl['dbname']}"

aw_engine = create_engine(url_aw)
etl_engine = create_engine(url_etl)

In [3]:
query_reseller = """
SELECT
    s.BusinessEntityID AS store_id,
    s.Name AS store_name,
    s.SalesPersonID,
    s.Demographics
FROM Sales.Store s;
"""

df_reseller = pd.read_sql(query_reseller, aw_engine)
df_reseller.head()


Unnamed: 0,store_id,store_name,SalesPersonID,Demographics
0,292,Next-Door Bike Store,279,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."
1,294,Professional Sales and Service,276,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."
2,296,Riders Company,277,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."
3,298,The Bike Mechanics,275,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."
4,300,Nationwide Supply,286,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."


In [4]:
# Convertir XML a texto (si es nulo, dejar vacÃ­o)
df_reseller["demographics"] = df_reseller["Demographics"].astype(str)
df_reseller.drop(columns=["Demographics"], inplace=True)

# Valores faltantes
df_reseller = df_reseller.fillna({
    "store_name": "Unknown",
    "SalesPersonID": -1
})


In [5]:
df_reseller.rename(columns={
    "store_id": "reseller_id",
    "store_name": "reseller_name",
    "SalesPersonID": "salesperson_id"
}, inplace=True)


In [6]:
df_reseller.insert(0, "reseller_key", range(1, len(df_reseller) + 1))
df_reseller.head()


Unnamed: 0,reseller_key,reseller_id,reseller_name,salesperson_id,demographics
0,1,292,Next-Door Bike Store,279,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."
1,2,294,Professional Sales and Service,276,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."
2,3,296,Riders Company,277,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."
3,4,298,The Bike Mechanics,275,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."
4,5,300,Nationwide Supply,286,"<StoreSurvey xmlns=""http://schemas.microsoft.c..."


In [7]:
df_reseller.to_sql(
    "dim_reseller",
    etl_engine,
    if_exists="replace",
    index=False
)


701