# Updates (transactional)

In [92]:
import pandas as pd

## Database connection

In [None]:
import yaml
from sqlalchemy import create_engine


with open("../config.yml", "r") as file:
	config = yaml.safe_load(file)
	config_OLTP = config["OLTP"]
	config_OLAP = config["OLAP"]


url_OLTP = (f"{config_OLTP['drivername']}://{config_OLTP['user']}:{config_OLTP['password']}"
			f"@{config_OLTP['host']}:{config_OLTP['port']}/{config_OLTP['database_name']}")

url_OLAP = (f"{config_OLAP['drivername']}://{config_OLAP['user']}:{config_OLAP['password']}"
			f"@{config_OLAP['host']}:{config_OLAP['port']}/{config_OLAP['database_name']}")

OLTP_connection = create_engine(url_OLTP)
OLAP_connection = create_engine(url_OLAP)

## Extraction

In [94]:
time_dimension = pd.read_sql_table("TIME_DIMENSION", OLAP_connection)
updates = pd.read_sql_table("mensajeria_novedadesservicio", OLTP_connection)

In [104]:
print(time_dimension.head())
print(time_dimension.shape)

   time_id       date  year  month  day  day_of_year  day_of_month  month_str  \
0        0 2023-09-18  2023      9   18          261            30  September   
1        1 2023-09-19  2023      9   19          262            30  September   
2        2 2023-09-20  2023      9   20          263            30  September   
3        3 2023-09-21  2023      9   21          264            30  September   
4        4 2023-09-22  2023      9   22          265            30  September   

     day_str  
0     Monday  
1    Tuesday  
2  Wednesday  
3   Thursday  
4     Friday  
(114, 9)


In [103]:
print(updates.head())
print(updates.shape)

   id fecha_novedad  tipo_novedad_id descripcion  servicio_id  es_prueba  \
0   4    2023-11-30                1           A           51       True   
1   5    2023-11-30                1        Halo           51       True   
2   6    2023-11-30                1           A           51       True   
3   7    2023-11-30                1           B           51       True   
4   8    2023-11-30                1           A           51       True   

   mensajero_id  
0             7  
1             7  
2             7  
3             7  
4             7  
(5208, 7)


## Transformation

Ignore the hours in the updates times.

In [97]:
updates["fecha_novedad"] = pd.to_datetime(updates["fecha_novedad"].dt.date)

In [98]:
updates.head()

Unnamed: 0,id,fecha_novedad,tipo_novedad_id,descripcion,servicio_id,es_prueba,mensajero_id
0,4,2023-11-30,1,A,51,True,7
1,5,2023-11-30,1,Halo,51,True,7
2,6,2023-11-30,1,A,51,True,7
3,7,2023-11-30,1,B,51,True,7
4,8,2023-11-30,1,A,51,True,7
...,...,...,...,...,...,...,...
95,101,2024-02-07,1,Se entrega factura frep 54172 para que sea leg...,637,True,22
96,102,2024-02-07,2,Recojido por Cristian roa,711,True,16
97,103,2024-02-07,1,Por favor arrimar la mx al laboratorio están h...,739,True,3
98,104,2024-02-07,1,Servicio duplicado,732,True,16


In [99]:
updates_fact_table = pd.merge(updates, time_dimension[["date", "time_id"]],
								left_on="fecha_novedad", right_on="date", how="left")

In [None]:
updates_fact_table.drop(columns=["fecha_novedad","date", "es_prueba", "mensajero_id"], inplace=True)

updates_fact_table.rename(
	columns={
		"id": "updates_fact_table_id",
		"tipo_novedad_id": "update_id",
		"servicio_id": "service_id",
		"descripcion": "update_description",
	}, inplace=True
)

In [106]:
print(updates_fact_table.head())
print(updates_fact_table.shape)

   updates_fact_table_id  update_id update_description  service_id  time_id
0                      4          1                  A          51     73.0
1                      5          1               Halo          51     73.0
2                      6          1                  A          51     73.0
3                      7          1                  B          51     73.0
4                      8          1                  A          51     73.0
(5208, 5)


## Load

In [105]:
updates_fact_table.to_sql(
	"UPDATES_FACT_TABLE", OLAP_connection, if_exists="replace", index=False
)

208