In [None]:
import yaml
import pandas as pd
from sqlalchemy import create_engine

In [None]:
with open('../config.yaml') as f:
    config = yaml.safe_load(f)
    configSource = config['source']
    configDestination = config['destination']

urlSource = f"{configSource['driver']}://{configSource['user']}:{configSource['password']}@{configSource['host']}:{configSource['port']}/{configSource['db']}"
urlDestination = f"{configDestination['driver']}://{configDestination['user']}:{configDestination['password']}@{configDestination['host']}:{configDestination['port']}/{configDestination['db']}"

engineSource = create_engine(urlSource)
engineDestination = create_engine(urlDestination)

In [None]:

mensajeria_servicio = pd.read_sql_table('mensajeria_servicio', engineSource)
clientes_usuarioaquitoy = pd.read_sql_table('clientes_usuarioaquitoy', engineSource)

mensajeria_servicio.drop(columns=["id", "descripcion", "nombre_solicitante", "hora_solicitud", "fecha_deseada", "hora_deseada", "nombre_recibe", "telefono_recibe", "descripcion_pago", "ida_y_regreso", "activo", "novedades", "cliente_id", "destino_id", "mensajero_id", "origen_id", "tipo_pago_id", "tipo_servicio_id", "tipo_vehiculo_id", "prioridad", "ciudad_destino_id", "ciudad_origen_id", "hora_visto_por_mensajero", "visto_por_mensajero", "descripcion_multiples_origenes", "mensajero2_id", "mensajero3_id", "multiples_origenes", "asignar_mensajero", "es_prueba", "descripcion_cancelado"], inplace=True)

clientes_usuarioaquitoy.drop(columns=["ciudad_id", "user_id", "telefono", "area_id", "token_Firebase", "lider"], inplace=True)

fact_messaging_daily = pd.merge(mensajeria_servicio, clientes_usuarioaquitoy, left_on='usuario_id', right_on='id', how='inner')

fact_messaging_daily.drop(columns=["usuario_id", "id"], inplace=True)

total_day = fact_messaging_daily.groupby(['fecha_solicitud', 'cliente_id', 'sede_id']).size().reset_index(name='total_day')
fact_messaging_daily = fact_messaging_daily.merge(total_day, on=['fecha_solicitud', 'cliente_id', 'sede_id'])
fact_messaging_daily = fact_messaging_daily.drop_duplicates(subset=['fecha_solicitud', 'cliente_id', 'sede_id'])

fact_messaging_daily.rename(columns={'fecha_solicitud': 'date'}, inplace=True)

dim_date = pd.read_sql_table('dim_date', engineDestination)

fact_messaging_daily = fact_messaging_daily.merge(dim_date[['date', 'key_dim_date']], on='date', how='left')
fact_messaging_daily.drop(columns=['date'], inplace=True)
fact_messaging_daily.rename(columns={'key_dim_date': 'key_date', 'cliente_id': 'key_client', 'sede_id': 'key_headquarter',}, inplace=True)

fact_messaging_daily["key_fact_messaging_daily"] = range(1, len(fact_messaging_daily) + 1)

fact_messaging_daily

In [None]:
print(fact_messaging_daily.isnull().sum())

In [None]:
fact_messaging_daily.to_sql("fact_messaging_daily", engineDestination, index=False, if_exists="replace")