In [5]:
import joblib
with open('fitted_model.pkl', 'rb') as fd:
    model = joblib.load(fd)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [18]:
EXPERIMENT_NAME = "yandex_realty_aleksandra"
REGISTRY_MODEL_NAME = "base_model_aleksandra"
TABLE_NAME = "flats_clean_table" 
RUN_NAME = 'save_base_model'

In [8]:
import mlflow
TRACKING_SERVER_HOST = "127.0.0.1"
TRACKING_SERVER_PORT = 5000

tracking_uri = f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}"
mlflow.set_tracking_uri(tracking_uri)
mlflow.set_registry_uri(tracking_uri)


In [9]:
import os
import psycopg
import pandas as pd

os.environ["DB_DESTINATION_HOST"] = os.getenv("DB_DESTINATION_HOST")
os.environ["DB_DESTINATION_PORT"] = os.getenv("DB_DESTINATION_PORT")
os.environ["DB_DESTINATION_NAME"] = os.getenv("DB_DESTINATION_NAME")
os.environ["DB_DESTINATION_USER"] = os.getenv("DB_DESTINATION_USER")
os.environ["DB_DESTINATION_PASSWORD"] = os.getenv("DB_DESTINATION_PASSWORD")

os.environ["MLFLOW_S3_ENDPOINT_URL"] = "https://storage.yandexcloud.net" #endpoint бакета от YandexCloud
os.environ["AWS_ACCESS_KEY_ID"] = os.getenv("AWS_ACCESS_KEY_ID") # получаем id ключа бакета, к которому подключён MLFlow, из .env
os.environ["AWS_SECRET_ACCESS_KEY"] = os.getenv("AWS_SECRET_ACCESS_KEY")


connection = {"sslmode": "require", "target_session_attrs": "read-write"}
postgres_credentials = {
    "host": os.environ["DB_DESTINATION_HOST"], 
    "port": os.environ["DB_DESTINATION_PORT"],
    "dbname": os.environ["DB_DESTINATION_NAME"],
    "user": os.environ["DB_DESTINATION_USER"],
    "password": os.environ["DB_DESTINATION_PASSWORD"],
}
assert all([var_value != "" for var_value in list(postgres_credentials.values())])

connection.update(postgres_credentials)

with psycopg.connect(**connection) as conn:

    with conn.cursor() as cur:
        cur.execute(f"SELECT * FROM {TABLE_NAME}")
        data = cur.fetchall()
        columns = [col[0] for col in cur.description]

df = pd.DataFrame(data, columns=columns)

In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df, df['price'], test_size=0.2, random_state=42)


In [17]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

prediction = model.predict(X_test)

metrics = {}

mae = mean_absolute_error(y_test, prediction)
mse = mean_squared_error(y_test, prediction)
r2 = r2_score(y_test, prediction)
mape = mean_absolute_percentage_error(y_test, prediction)

# запишите значения метрик в словарь
metrics["mae"] = mae
metrics["mse"] = mse
metrics["r2"] = r2
metrics["mape"] = mape


In [22]:

pip_requirements = "../requirements.txt"# ваш код здесь
signature = mlflow.models.infer_signature(X_test, prediction)
input_example = X_test[:10]
metadata = {'model_type': 'monthly'}

experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
if not (experiment_id):
    experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)
else:
    experiment_id = experiment_id.experiment_id

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    run_id = run.info.run_id
    # ваш код здесь
    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        pip_requirements=pip_requirements,
        metadata=metadata,
        signature=signature,
        input_example=input_example,
        registered_model_name=REGISTRY_MODEL_NAME,
        artifact_path='models',
        await_registration_for=60
    )
    mlflow.log_metrics(metrics=metrics)


  inputs = _infer_schema(model_input) if model_input is not None else None
Successfully registered model 'base_model_aleksandra'.
2025/01/15 07:18:05 INFO mlflow.tracking._model_registry.client: Waiting up to 60 seconds for model version to finish creation. Model name: base_model_aleksandra, version 1
Created version '1' of model 'base_model_aleksandra'.
