In [1]:
import mlflow
import boto3
import os
import joblib
import json
import pandas as pd
import psycopg2

TABLE_NAME = 'clean_data'

TRACKING_SERVER_HOST = "127.0.0.1"
TRACKING_SERVER_PORT = 5000
MLFLOW_TRACKING_URI = f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}"

EXPERIMENT_NAME = os.environ['EXPERIMENT_NAME']
RUN_NAME = 'baseline_model'

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_registry_uri(MLFLOW_TRACKING_URI)

* 'schema_extra' has been renamed to 'json_schema_extra'


In [2]:
connection = {"sslmode": "require", "target_session_attrs": "read-write"}
postgres_credentials = {
    "host": os.getenv("DB_DESTINATION_HOST"),
    "port": os.getenv("DB_DESTINATION_PORT"),
    "dbname": os.getenv("DB_DESTINATION_NAME"),
    "user": os.getenv("DB_DESTINATION_USER"),
    "password": os.getenv("DB_DESTINATION_PASSWORD"),
}

connection.update(postgres_credentials)

with psycopg2.connect(**connection) as conn:
    with conn.cursor() as cur:
        cur.execute(f"SELECT * FROM {TABLE_NAME}")
        data = cur.fetchall()
        columns = [col[0] for col in cur.description]

df = pd.DataFrame(data, columns=columns)

df.head(2)

Unnamed: 0,flat_id,building_id,build_year,building_type_int,latitude,longitude,ceiling_height,flats_count,floors_total,has_elevator,floor,kitchen_area,living_area,rooms,is_apartment,studio,total_area,price
0,22008,15220,1989,4,55.642132,37.599354,2.7,260,17,True,2,10.1,44.799999,3,False,False,73.800003,14000000.0
1,22009,10411,1973,4,55.675995,37.721516,2.64,215,9,True,3,6.0,28.0,2,False,False,44.0,9450000.0


In [3]:
with open('../models/baseline_model.pkl', 'rb') as fd:
    model = joblib.load(fd)

In [4]:
with open('../cv_results/cv_res.json', 'r') as f:
    metrics = json.load(f)

In [5]:
y = df['price']
X = df.drop('price', axis=1)

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [7]:
prediction = model.predict(X)

In [8]:
pip_requirements = '../requirements.txt'
signature = mlflow.models.infer_signature(
    X_test, 
    prediction
)
input_example = X_test[:10]

  inputs = _infer_schema(model_input) if model_input is not None else None


In [11]:
experiment_id = mlflow.set_experiment(EXPERIMENT_NAME).experiment_id

with mlflow.start_run(experiment_id=experiment_id, run_name=RUN_NAME) as run:
    run_id = run.info.run_id

    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model=model,
        pip_requirements=pip_requirements,
        signature=signature,
        input_example=input_example,
        registered_model_name="estate_prices_model",
        artifact_path='ep'
    )

Successfully registered model 'estate_prices_model'.
2026/01/26 14:24:19 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: estate_prices_model, version 1
Created version '1' of model 'estate_prices_model'.
