In [1]:
from datetime import date

import numpy as np
import pandas as pd
import yaml
from sqlalchemy import create_engine


# database connections 

In [2]:
with open('../config.yml', 'r') as f:
    config = yaml.safe_load(f)
    config_co = config['CO_SA']
    config_etl = config['ETL_PRO']

# Construct the database URL
url_co = (f"{config_co['drivername']}://{config_co['user']}:{config_co['password']}@{config_co['host']}:"
          f"{config_co['port']}/{config_co['dbname']}")
url_etl = (f"{config_etl['drivername']}://{config_etl['user']}:{config_etl['password']}@{config_etl['host']}:"
           f"{config_etl['port']}/{config_etl['dbname']}")
# Create the SQLAlchemy Engine
co_sa = create_engine(url_co)
etl_conn = create_engine(url_etl)

# Extract

In [3]:
dim_medico = pd.read_sql_table('medico', co_sa)


# Transformations

In [4]:
dim_medico.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 720 entries, 0 to 719
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   cedula                 720 non-null    object
 1   nombre                 720 non-null    object
 2   especialidad           720 non-null    object
 3   subespecialidad        720 non-null    object
 4   licencia               720 non-null    object
 5   direccion_consultorio  720 non-null    object
 6   id_ips                 720 non-null    object
dtypes: object(7)
memory usage: 39.5+ KB


In [5]:
dim_medico.describe(include='all')

Unnamed: 0,cedula,nombre,especialidad,subespecialidad,licencia,direccion_consultorio,id_ips
count,720,720,720,720.0,720,720.0,720
unique,720,720,27,12.0,720,26.0,60
top,1113543,Félix Murillo Hurtado,Medicina General,,1507802346,,IPS_14
freq,1,1,91,627.0,1,693.0,20


In [None]:
dim_medico.replace({np.nan: 'no aplica', ' ': 'no aplica','':'no_aplica'}, inplace=True)
dim_medico["saved"] = date.today()

# load

In [7]:
dim_medico.head()

Unnamed: 0,cedula,nombre,especialidad,subespecialidad,licencia,direccion_consultorio,id_ips,saved
0,1113543,Félix Murillo Hurtado,Neurología,no_aplica,1507802346,no aplica,IPS_1,2024-09-17
1,2113543,Cecilia Salas Quiñones,Medicina de emergencia,no_aplica,1507802354,no aplica,IPS_1,2024-09-17
2,3113543,Elcira Espinosa Abad,Anestesiología,no_aplica,1507802375,no aplica,IPS_1,2024-09-17
3,4113543,Gabino Niño Castillo,Radiología,no_aplica,1507802392,no aplica,IPS_1,2024-09-17
4,5113543,Alberto Benavides Hurtado,Dermatología,no_aplica,1507802419,Transversal 10 # 69-65,IPS_1,2024-09-17


In [8]:
dim_medico.to_sql('dim_medico', etl_conn, if_exists='replace',index_label='key_dim_medico')

720