In [1]:
from mlflow.tracking import MlflowClient
import mlflow.sklearn
import pandas as pd
from datetime import datetime, timedelta
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os
import joblib

load_dotenv(dotenv_path='../.env')

True

In [2]:
PG_PREDICTION_PWD = os.getenv('PG_PREDICTION_PWD')
url_conexion = f'postgresql+psycopg2://prediction_user:{PG_PREDICTION_PWD}@localhost:5432/reservations_db'

engine = create_engine(url_conexion)

nombre_tabla = 'reservations'
df = pd.read_sql_query(f'SELECT * FROM {nombre_tabla}', engine)

In [3]:
mlflow.set_tracking_uri("http://localhost:5000")

client = MlflowClient()

models = client.search_registered_models()

print("Models registered in MLflow:")
for model in models:
    print(f"Model name: {model.name}")

Models registered in MLflow:
Model name: sk-learn-logistic-regression-reg-model


In [4]:
model_name = "sk-learn-logistic-regression-reg-model"
model_alias = "champion"
model_version_details = client.get_model_version_by_alias(model_name, model_alias)

In [5]:
artifact_path = 'preprocessing/preprocessor_model.pkl'
local_path = mlflow.artifacts.download_artifacts(run_id=model_version_details.run_id, artifact_path=artifact_path, dst_path='./assets')
preprocessor = joblib.load('artifacts/preprocessor_model.pkl')

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 55.56it/s]


In [6]:
# Carga el modelo usando el alias
model_uri = f"models:/{model_name}@{model_alias}"
model = mlflow.sklearn.load_model(model_uri)

Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 82.32it/s]


In [7]:
model

In [8]:
model_info = mlflow.models.Model.load(f"runs:/{model_version_details.run_id}/model")
signature = model_info.signature

Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 117.29it/s]


In [9]:
# Defining categorical, numerical and binary columns
cat_cols = ['hotel','meal', 'market_segment','distribution_channel',
            'reserved_room_type','deposit_type','customer_type']

num_cols = ['lead_time','days_in_waiting_list',
            'adr','total_stay','total_people']

bin_cols = ['is_repeated_guest','previous_cancellations',
            'previous_bookings_not_canceled','booking_changes',
            'agent','company','required_car_parking_spaces',
            'total_of_special_requests']

In [10]:
dataset =(df
 .drop_duplicates()
 .fillna(0)
 .assign(total_stay=lambda df: df['stays_in_weekend_nights'] + df['stays_in_week_nights'],
         total_people=lambda df: df['adults'] + df['children'] + df['babies'],
        )
 [cat_cols + num_cols + bin_cols]
 .assign(total_people=lambda df: df['total_people'].astype('int64'),
         agent=lambda df: df['agent'].astype('int64'),
         company=lambda df: df['company'].astype('int64'),
        )
)

In [11]:
dataset

Unnamed: 0,hotel,meal,market_segment,distribution_channel,reserved_room_type,deposit_type,customer_type,lead_time,days_in_waiting_list,adr,total_stay,total_people,is_repeated_guest,previous_cancellations,previous_bookings_not_canceled,booking_changes,agent,company,required_car_parking_spaces,total_of_special_requests
0,Resort Hotel,BB,Groups,TA/TO,D,Non Refund,Transient,238,0,89.00,3,1,0,0,0,0,1,0,0,0
1,Resort Hotel,BB,Online TA,TA/TO,A,No Deposit,Transient,33,0,154.00,2,2,0,0,0,0,240,0,1,1
2,City Hotel,BB,Groups,TA/TO,A,Non Refund,Transient,239,0,60.00,1,2,0,1,0,0,1,0,0,0
3,Resort Hotel,BB,Online TA,TA/TO,A,No Deposit,Transient,64,0,80.10,3,2,0,0,0,0,240,0,0,2
4,City Hotel,BB,Online TA,TA/TO,D,No Deposit,Transient,34,0,95.47,5,2,0,0,0,0,7,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,City Hotel,BB,Online TA,TA/TO,A,No Deposit,Transient,36,0,140.00,4,2,0,0,0,0,9,0,0,1
96,City Hotel,BB,Corporate,Corporate,A,No Deposit,Transient,1,0,65.00,1,1,0,0,0,0,40,0,0,0
97,Resort Hotel,BB,Corporate,TA/TO,A,No Deposit,Transient,1,0,35.00,3,1,1,0,13,1,0,390,0,1
98,Resort Hotel,BB,Online TA,TA/TO,A,No Deposit,Transient,53,0,52.00,4,2,0,0,0,0,240,0,0,0


In [12]:
new_dataset = preprocessor.transform(dataset)

In [13]:
print(new_dataset)

    cat__hotel_City Hotel  cat__hotel_Resort Hotel  cat__meal_BB  \
0                     0.0                      1.0           1.0   
1                     0.0                      1.0           1.0   
2                     1.0                      0.0           1.0   
3                     0.0                      1.0           1.0   
4                     1.0                      0.0           1.0   
..                    ...                      ...           ...   
95                    1.0                      0.0           1.0   
96                    1.0                      0.0           1.0   
97                    0.0                      1.0           1.0   
98                    0.0                      1.0           1.0   
99                    1.0                      0.0           1.0   

    cat__meal_FB  cat__meal_HB  cat__meal_SC  cat__meal_Undefined  \
0            0.0           0.0           0.0                  0.0   
1            0.0           0.0           0.0 

In [14]:
pred = model.predict(new_dataset)
pred_prob = model.predict_proba(new_dataset)



In [15]:
pred, pred_prob

(array([1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
        0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,
        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64),
 array([[1.48950041e-02, 9.85104996e-01],
        [9.98838677e-01, 1.16132262e-03],
        [3.19227452e-04, 9.99680773e-01],
        [7.48198893e-01, 2.51801107e-01],
        [7.71439894e-01, 2.28560106e-01],
        [4.82477988e-01, 5.17522012e-01],
        [7.30005178e-01, 2.69994822e-01],
        [7.93527644e-01, 2.06472356e-01],
        [9.27596310e-01, 7.24036904e-02],
        [8.93102636e-01, 1.06897364e-01],
        [7.40595671e-01, 2.59404329e-01],
        [7.43669657e-01, 2.56330343e-01],
        [8.45936868e-02, 9.15406313e-01],
        [7.02395761e-01, 2.97604239e-01],
        [6.33192763e-01, 3.66807237e-01],
        [9.0011