In [1]:
from datetime import date

import numpy as np
import pandas as pd
import yaml

In [47]:
from sqlalchemy import create_engine

with open('./config.yml', 'r') as f:
        config = yaml.safe_load(f)
        config_ryf = config['RAPIDO_Y_FURIOSO']
        config_etl = config['ETL']

url_ryf = (f"{config_ryf['drivername']}://{config_ryf['user']}:{config_ryf['password']}@{config_ryf['host']}:"
        f"{config_ryf['port']}/{config_ryf['dbname']}")
url_etl = (f"{config_etl['drivername']}://{config_etl['user']}:{config_etl['password']}@{config_etl['host']}:"
        f"{config_etl['port']}/{config_etl['dbname']}")

ryf = create_engine(url_ryf)
etl_conn = create_engine(url_etl)

In [48]:
dim_mensajero = pd.read_sql_table('clientes_mensajeroaquitoy', ryf)
usuario = pd.read_sql_table('auth_user', ryf)

In [49]:
dim_mensajero = dim_mensajero.merge(
    usuario, 
    left_on='user_id',
    right_on='id',
    how='left'
)

In [50]:
dim_mensajero.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 21 columns):
 #   Column               Non-Null Count  Dtype              
---  ------               --------------  -----              
 0   id_x                 50 non-null     int64              
 1   user_id              50 non-null     int64              
 2   activo               50 non-null     bool               
 3   fecha_entrada        33 non-null     datetime64[ns]     
 4   fecha_salida         0 non-null      datetime64[ns]     
 5   salario              35 non-null     float64            
 6   telefono             50 non-null     object             
 7   ciudad_operacion_id  45 non-null     float64            
 8   token_Firebase       2 non-null      object             
 9   url_foto             50 non-null     object             
 10  id_y                 50 non-null     int64              
 11  password             50 non-null     object             
 12  last_login           46 

In [51]:
dim_mensajero.describe(include='all')

Unnamed: 0,id_x,user_id,activo,fecha_entrada,fecha_salida,salario,telefono,ciudad_operacion_id,token_Firebase,url_foto,...,password,last_login,is_superuser,username,first_name,last_name,email,is_staff,is_active,date_joined
count,50.0,50.0,50,33,0,35.0,50,45.0,2,50,...,50,46,50,50,50,50,50,50,50,50
unique,,,2,,,,1,,1,1,...,50,,2,50,1,1,1,2,2,
top,,,True,,,,310-300000,,eotLgNBIRxhIxb3Z8rCLU2:APA91bHMzsS_jZJy493-pmB...,http:,...,pbkdf2_sha256$600000$0yLDDD7GGhUyqdNKHG9ZNs$bG...,,False,mensajero1,pepito_el_rapido,pepito_el_furioso,rapidos-furiosos@gmail.com,False,True,
freq,,,49,,,,50,,2,50,...,1,,49,1,50,50,50,49,48,
mean,28.16,310.7,,2021-03-21 05:05:27.272727296,NaT,1061860.0,,1.533333,,,...,,2024-08-01 09:25:35.750920448+00:00,,,,,,,,2023-11-26 05:00:48.200000256+00:00
min,1.0,1.0,,2012-05-08 00:00:00,NaT,11111.0,,1.0,,,...,,2023-12-28 22:40:25+00:00,,,,,,,,2023-06-15 16:06:05+00:00
25%,13.5,328.25,,2020-04-01 00:00:00,NaT,1160000.0,,1.0,,,...,,2024-08-04 10:15:47.630036736+00:00,,,,,,,,2023-10-05 15:40:44+00:00
50%,27.5,344.5,,2021-11-08 00:00:00,NaT,1160000.0,,1.0,,,...,,2024-08-22 18:42:40.130569984+00:00,,,,,,,,2023-10-05 16:48:06+00:00
75%,39.75,372.75,,2023-02-01 00:00:00,NaT,1160000.0,,1.0,,,...,,2024-08-30 19:46:31.582338816+00:00,,,,,,,,2024-01-23 00:23:47.500000+00:00
max,84.0,492.0,,2024-08-12 00:00:00,NaT,3000000.0,,13.0,,,...,,2024-08-31 19:41:33.430951+00:00,,,,,,,,2024-08-09 19:45:27+00:00


In [52]:
dim_mensajero.replace({
    np.nan: 'no aplica',
    'NaT': 'no aplica'
}, inplace=True)
dim_mensajero["saved"] = date.today()

In [53]:
dim_mensajero['ciudad_operacion_id'] = (pd.to_numeric(dim_mensajero['ciudad_operacion_id'], errors='coerce')
                                         .fillna(pd.NA)
                                         .astype('Int64'))

In [62]:
dim_mensajero = dim_mensajero.filter(['nombres', 'apellidos', 'telefono', 'ciudad_operacion_id'])

In [63]:
dim_mensajero = dim_mensajero.rename(columns={
    'first_name': 'nombres',
    'last_name': 'apellidos',
})

In [64]:
dim_mensajero.head()

Unnamed: 0,nombres,apellidos,telefono,ciudad_operacion_id
0,pepito_el_rapido,pepito_el_furioso,310-300000,13
1,pepito_el_rapido,pepito_el_furioso,310-300000,1
2,pepito_el_rapido,pepito_el_furioso,310-300000,1
3,pepito_el_rapido,pepito_el_furioso,310-300000,1
4,pepito_el_rapido,pepito_el_furioso,310-300000,4


In [65]:
dim_mensajero.to_sql('dim_mensajero', etl_conn, if_exists='replace',index_label='key_dim_mensajero')

50