# Updates (transactional)

In [23]:
import pandas as pd

## Database connection

In [24]:
import yaml
from sqlalchemy import create_engine


with open("../config.yml", "r") as file:
	config = yaml.safe_load(file)
	config_OLTP = config["OLTP"]
	config_OLAP = config["OLAP"]


url_OLTP = (f"{config_OLTP['drivername']}://{config_OLTP['user']}:{config_OLTP['password']}"
			f"@{config_OLTP['host']}:{config_OLTP['port']}/{config_OLTP['database_name']}")

url_OLAP = (f"{config_OLAP['drivername']}://{config_OLAP['user']}:{config_OLAP['password']}"
			f"@{config_OLAP['host']}:{config_OLAP['port']}/{config_OLAP['database_name']}")

OLTP_connection = create_engine(url_OLTP)
OLAP_connection = create_engine(url_OLAP)

## Extraction

In [25]:
time_dimension = pd.read_sql_table("TIME_DIMENSION", OLAP_connection)
updates = pd.read_sql_table("mensajeria_novedadesservicio", OLTP_connection)

In [26]:
print(time_dimension.head())
print(time_dimension.shape)

   time_id                date  year  month  day  day_of_year  day_of_month  \
0        0 2023-09-18 00:00:00  2023      9   18          261            30   
1        1 2023-09-18 00:01:00  2023      9   18          261            30   
2        2 2023-09-18 00:02:00  2023      9   18          261            30   
3        3 2023-09-18 00:03:00  2023      9   18          261            30   
4        4 2023-09-18 00:04:00  2023      9   18          261            30   

   month_str day_str  hour  minute  
0  September  Monday     0       0  
1  September  Monday     0       1  
2  September  Monday     0       2  
3  September  Monday     0       3  
4  September  Monday     0       4  
(502561, 11)


In [27]:
print(updates.head())
print(updates.shape)

   id             fecha_novedad  tipo_novedad_id descripcion  servicio_id  \
0   4 2023-11-30 05:00:00+00:00                1           A           51   
1   5 2023-11-30 05:00:00+00:00                1        Halo           51   
2   6 2023-11-30 05:00:00+00:00                1           A           51   
3   7 2023-11-30 05:00:00+00:00                1           B           51   
4   8 2023-11-30 05:00:00+00:00                1           A           51   

   es_prueba  mensajero_id  
0       True             7  
1       True             7  
2       True             7  
3       True             7  
4       True             7  
(5208, 7)


## Transformation

Ignore the hours in the updates times.

In [28]:
updates["fecha_novedad"] = updates["fecha_novedad"].dt.floor("min").dt.tz_localize(None)

In [29]:
updates.head()

Unnamed: 0,id,fecha_novedad,tipo_novedad_id,descripcion,servicio_id,es_prueba,mensajero_id
0,4,2023-11-30 05:00:00,1,A,51,True,7
1,5,2023-11-30 05:00:00,1,Halo,51,True,7
2,6,2023-11-30 05:00:00,1,A,51,True,7
3,7,2023-11-30 05:00:00,1,B,51,True,7
4,8,2023-11-30 05:00:00,1,A,51,True,7


In [30]:
updates_fact_table = pd.merge(updates, time_dimension[["date", "time_id"]],
								left_on="fecha_novedad", right_on="date", how="left")

In [31]:
updates_fact_table.drop(columns=["id", "fecha_novedad", "date", "es_prueba", "mensajero_id"], inplace=True)

updates_fact_table.rename(
	columns={
		"tipo_novedad_id": "update_id",
		"servicio_id": "service_id",
		"descripcion": "update_description",
	}, inplace=True
)

In [32]:
updates_fact_table.reset_index(inplace=True)
updates_fact_table.rename(columns={ "index": "updates_fact_table_id" }, inplace=True)
updates_fact_table.set_index("updates_fact_table_id", inplace=True)

In [33]:
print(updates_fact_table.head())
print(updates_fact_table.shape)

                       update_id update_description  service_id  time_id
updates_fact_table_id                                                   
0                              1                  A          51   105420
1                              1               Halo          51   105420
2                              1                  A          51   105420
3                              1                  B          51   105420
4                              1                  A          51   105420
(5208, 4)


## Load

In [34]:
updates_fact_table.to_sql(
	"UPDATES_FACT_TABLE", OLAP_connection, if_exists="replace", index=True
)

208