In [9]:
import os
import numpy as np
import pandas as pd

import mlflow
from mlflow.models import infer_signature

from datetime import datetime, timedelta
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from typing import NoReturn, Literal, Dict, Any

from sqlalchemy import create_engine

640

In [None]:
os.getenv("MLFLOW_S3_ENDPOINT_URL", None)

In [None]:
os.getenv("MLFLOW_TRACKING_URI", None)

In [None]:
FEATURES = ["MedInc", "HouseAge", "AveRooms", "AveBedrms", 
            "Population", "AveOccup", "Latitude", "Longitude"]
TARGET = "MedHouseVal"

#models = dict(zip(['rf', 'lr', 'hgb'], [RandomForestRegressor(), LinearRegression(), HistGradientBoostingRegressor()]))

In [None]:
engine = create_engine('postgresql://postgres:____@localhost:5432/postgres')

In [None]:
data = pd.read_sql_query("SELECT * FROM california_housing", engine)

In [None]:
data.head(5)

In [None]:
# Сделать препроцессинг
# Разделить на фичи и таргет
X, y = data[FEATURES], data[TARGET]

# Разделить данные на обучение и тест
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Обучить стандартизатор на train
scaler = StandardScaler()
X_train_fitted = scaler.fit_transform(X_train)
X_test_fitted = scaler.transform(X_test)

# Инициализировать модель
model = RandomForestRegressor()

In [None]:
def train_model(model, X_train, X_test, y_train, y_test):
    # Обучить модель
    model.fit(X_train, y_train)
    
    # Сделать предсказание
    prediction = model.predict(X_test)
    
    # Посчитать метрики
    r2 = r2_score(y_test, prediction)
    rmse = mean_squared_error(y_test, prediction)**0.5
    mae = mean_absolute_error(y_test, prediction)
    
    # Сохранить метрики
    mlflow.log_metric("r2_score", r2)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    
    # Сохранить модель
    signature = infer_signature(X_test, prediction)
    mlflow.sklearn.log_model(model, "model_linreg", signature=signature)
    mlflow.sklearn.save_model(model, "model_linreg")

In [None]:
# defining a new experiment
exp_name = "random_forest_test"
mlflow.create_experiment(exp_name, artifact_location=f"s3://kda-mlflow-artifacts/{exp_name}")
mlflow.set_experiment(exp_name)

In [None]:
mlflow.end_run()

In [None]:
with mlflow.start_run(run_name="run_1"):
    train_model(model, X_train_fitted, X_test_fitted, y_train, y_test)

In [None]:
mlflow.get_experiment_by_name("new_test_ml")

In [None]:
client = mlflow.client.MlflowClient()

In [None]:
client.tracking_uri