In [2]:
import datetime

import numpy as np
import pandas as pd
import yaml
from sqlalchemy import create_engine


# database connections 

In [3]:
with open('../config.yml', 'r') as f:
    config = yaml.safe_load(f)
    config_co = config['CO_SA']
    config_etl = config['ETL_PRO']

# Construct the database URL
url_co = (f"{config_co['drivername']}://{config_co['user']}:{config_co['password']}@{config_co['host']}:"
          f"{config_co['port']}/{config_co['dbname']}")
url_etl = (f"{config_etl['drivername']}://{config_etl['user']}:{config_etl['password']}@{config_etl['host']}:"
           f"{config_etl['port']}/{config_etl['dbname']}")
# Create the SQLAlchemy Engine
co_sa = create_engine(url_co)
etl_conn = create_engine(url_etl)

# Extract

In [4]:
df_citas = pd.read_sql_table('citas_generales', co_sa)
df_urgencias = pd.read_sql_table('urgencias', co_sa)
df_hosp = pd.read_sql_table('hospitalizaciones', co_sa)

In [5]:
df_hosp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 893 entries, 0 to 892
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   codigo_hospitalizacion    893 non-null    object        
 1   id_usuario                893 non-null    object        
 2   id_medico                 893 non-null    object        
 3   fecha_solicitud           893 non-null    datetime64[ns]
 4   hora_solicitud            893 non-null    object        
 5   fecha_atencion            893 non-null    datetime64[ns]
 6   hora_atencion             893 non-null    object        
 7   duracion_hospitalizacion  893 non-null    int64         
 8   diagnostico               893 non-null    object        
dtypes: datetime64[ns](2), int64(1), object(6)
memory usage: 62.9+ KB


# Transformations

In [6]:
from datetime import  timedelta

df_hosp.rename(columns={'codigo_hospitalizacion':'codigo_servicio'} ,inplace=True)
df_urgencias.rename(columns={'codigo_urgencia':'codigo_servicio'}, inplace=True)
df_citas.rename(columns={'codigo_cita':'codigo_servicio'},inplace=True)

df_citas['tipo_servicio'] = 'citas'
df_urgencias['tipo_servicio'] = 'urgencias'
df_hosp['tipo_servicio'] = 'hospitalizacion'

columns = ['codigo_servicio', 'id_usuario','id_medico','fecha_solicitud','fecha_atencion','hora_atencion','hora_solicitud','tipo_servicio']
trans_servicio = pd.concat([df_hosp,df_urgencias,df_citas],axis=0)
trans_servicio.head()
columns = set(trans_servicio.columns) - set(columns)

trans_servicio.drop(columns= columns,inplace=True)

trans_servicio['fecha_atencion'] = pd.to_datetime(trans_servicio['fecha_atencion'])
trans_servicio['fecha_solicitud'] = pd.to_datetime(trans_servicio['fecha_solicitud'])
trans_servicio['hora_atencion'] = trans_servicio['hora_atencion'].apply(lambda x : timedelta(hours=x.hour, minutes=x.minute,seconds=x.second))
trans_servicio['hora_solicitud'] = trans_servicio['hora_solicitud'].apply(lambda x : timedelta(hours=x.hour, minutes=x.minute,seconds=x.second))
trans_servicio['fecha_hora_atencion'] = trans_servicio['fecha_atencion'] + trans_servicio['hora_atencion']
trans_servicio['fecha_hora_solicitud'] = trans_servicio['fecha_solicitud'] +trans_servicio['hora_solicitud'] 

trans_servicio.reset_index(drop=True,inplace=True)
trans_servicio.head()


Unnamed: 0,codigo_servicio,id_usuario,id_medico,fecha_solicitud,hora_solicitud,fecha_atencion,hora_atencion,tipo_servicio,fecha_hora_atencion,fecha_hora_solicitud
0,808809,1705225684180,272113543,2007-03-19,0 days 08:00:00,2007-03-19,0 days 08:30:00,hospitalizacion,2007-03-19 08:30:00,2007-03-19 08:00:00
1,808811,1705225699140,81113543,2006-12-25,0 days 08:00:00,2006-12-25,0 days 08:30:00,hospitalizacion,2006-12-25 08:30:00,2006-12-25 08:00:00
2,808813,1705225788780,166113543,2007-12-16,0 days 08:00:00,2007-12-16,0 days 08:28:00,hospitalizacion,2007-12-16 08:28:00,2007-12-16 08:00:00
3,808815,1705225786530,228113543,2007-01-07,0 days 08:00:00,2007-01-07,0 days 08:34:00,hospitalizacion,2007-01-07 08:34:00,2007-01-07 08:00:00
4,808817,1705225880540,172113543,2006-10-29,0 days 08:00:00,2006-10-29,0 days 08:26:00,hospitalizacion,2006-10-29 08:26:00,2006-10-29 08:00:00


In [7]:
from datetime import date

trans_servicio['saved'] = date.today()

# load

In [8]:
trans_servicio.to_sql('trans_servicio',etl_conn,if_exists='replace',index_label='key_trans_servicio')

  trans_servicio.to_sql('trans_servicio',etl_conn,if_exists='replace',index_label='key_trans_servicio')


465

In [9]:
trans_servicio.info(
    
)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4465 entries, 0 to 4464
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype          
---  ------                --------------  -----          
 0   codigo_servicio       4465 non-null   object         
 1   id_usuario            4465 non-null   object         
 2   id_medico             4465 non-null   object         
 3   fecha_solicitud       4465 non-null   datetime64[ns] 
 4   hora_solicitud        4465 non-null   timedelta64[ns]
 5   fecha_atencion        4465 non-null   datetime64[ns] 
 6   hora_atencion         4465 non-null   timedelta64[ns]
 7   tipo_servicio         4465 non-null   object         
 8   fecha_hora_atencion   4465 non-null   datetime64[ns] 
 9   fecha_hora_solicitud  4465 non-null   datetime64[ns] 
 10  saved                 4465 non-null   object         
dtypes: datetime64[ns](4), object(5), timedelta64[ns](2)
memory usage: 383.8+ KB


In [10]:
trans_servicio

Unnamed: 0,codigo_servicio,id_usuario,id_medico,fecha_solicitud,hora_solicitud,fecha_atencion,hora_atencion,tipo_servicio,fecha_hora_atencion,fecha_hora_solicitud,saved
0,808809,1705225684180,272113543,2007-03-19,0 days 08:00:00,2007-03-19,0 days 08:30:00,hospitalizacion,2007-03-19 08:30:00,2007-03-19 08:00:00,2024-09-17
1,808811,1705225699140,81113543,2006-12-25,0 days 08:00:00,2006-12-25,0 days 08:30:00,hospitalizacion,2006-12-25 08:30:00,2006-12-25 08:00:00,2024-09-17
2,808813,1705225788780,166113543,2007-12-16,0 days 08:00:00,2007-12-16,0 days 08:28:00,hospitalizacion,2007-12-16 08:28:00,2007-12-16 08:00:00,2024-09-17
3,808815,1705225786530,228113543,2007-01-07,0 days 08:00:00,2007-01-07,0 days 08:34:00,hospitalizacion,2007-01-07 08:34:00,2007-01-07 08:00:00,2024-09-17
4,808817,1705225880540,172113543,2006-10-29,0 days 08:00:00,2006-10-29,0 days 08:26:00,hospitalizacion,2006-10-29 08:26:00,2006-10-29 08:00:00,2024-09-17
...,...,...,...,...,...,...,...,...,...,...,...
4460,333041,1135438558010,563113543,2007-07-26,0 days 11:32:00,2007-07-27,0 days 08:00:00,citas,2007-07-27 08:00:00,2007-07-26 11:32:00,2024-09-17
4461,333042,1135438558010,472113543,2007-11-07,0 days 08:29:00,2007-11-10,0 days 08:00:00,citas,2007-11-10 08:00:00,2007-11-07 08:29:00,2024-09-17
4462,333043,1135438558010,236113543,2007-11-14,0 days 18:26:00,2007-11-16,0 days 08:20:00,citas,2007-11-16 08:20:00,2007-11-14 18:26:00,2024-09-17
4463,333044,1135438558010,457113543,2008-01-14,0 days 08:16:00,2008-01-17,0 days 08:00:00,citas,2008-01-17 08:00:00,2008-01-14 08:16:00,2024-09-17
