In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import r2_score
from matplotlib import pyplot as plt
import seaborn as sns
sns.set()

import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
import pandas as pd

In [2]:
# データセットの読み込み
dataset = fetch_california_housing(as_frame=True)
df = dataset['frame']

# データセットを説明変数と目的変数に分ける
target_col = 'MedHouseVal'
X, y = df.drop(columns=[target_col]), df[target_col]

# データセットを訓練と検証データに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
signature = infer_signature(X_train, y_train)

In [3]:
def train(n_estimators, max_depth):
    # モデルの定義
    model = RandomForestRegressor(
                n_estimators = n_estimators,
                max_depth = max_depth,
                criterion = 'squared_error',  # 'mse'
                random_state = 0,
            )

    # 訓練
    model.fit(X_train, y_train)

    # 検証データの予測
    y_pred = model.predict(X_test)

    # MSE誤差, R2値の算出
    mse = MSE(y_pred, y_test)
    R2 = r2_score(y_pred, y_test)

    return model, mse, R2

In [4]:
# 管理IDを発行（コードの実行に対して）
with mlflow.start_run():
    # ハイパーパラメータの試行水準
    cand_n_estimators = [10, 100, 1000]
    cand_max_depth = [1, 5, 10]

    trial = 0
    for n_estimators in cand_n_estimators:
        for max_depth in cand_max_depth:
            # 管理IDを発行（各パラメータ水準に対して）
            with mlflow.start_run(nested=True):
                trial += 1
                model, mse, R2 = train(n_estimators, max_depth)
                # 小数点以下３桁まで出力
                print(f"trial {trial}: n_estimators={n_estimators}, max_depth={max_depth}, MSE = {mse:.3}, R2 = {R2:.3}")
                # 追加箇所
                # ハイパーパラメータ, 評価指標, 学習済みモデルをMLflowへ保存
                mlflow.log_param("n_estimators", n_estimators)
                mlflow.log_param("max_depth", max_depth)
                mlflow.log_metric("mse", mse)
                mlflow.log_metric("R2", R2)

                mlflow.sklearn.log_model(model,
                                         name="testmodel",
                                         signature=signature,
                                         input_example=X_train[:5])

trial 1: n_estimators=10, max_depth=1, MSE = 0.91, R2 = -1.33
🏃 View run crawling-bear-311 at: http://mlflow:5000/#/experiments/0/runs/29320c771634467586290261fedea124
🧪 View experiment at: http://mlflow:5000/#/experiments/0
trial 2: n_estimators=10, max_depth=5, MSE = 0.487, R2 = 0.38
🏃 View run clean-ape-428 at: http://mlflow:5000/#/experiments/0/runs/7dd1c1d3e6ba4359a7d57c79b71322ac
🧪 View experiment at: http://mlflow:5000/#/experiments/0
trial 3: n_estimators=10, max_depth=10, MSE = 0.312, R2 = 0.676
🏃 View run upbeat-midge-23 at: http://mlflow:5000/#/experiments/0/runs/6eec7e1b770f4b259d696cb4179d7f60
🧪 View experiment at: http://mlflow:5000/#/experiments/0
trial 4: n_estimators=100, max_depth=1, MSE = 0.909, R2 = -1.32
🏃 View run bouncy-snipe-828 at: http://mlflow:5000/#/experiments/0/runs/e471b591c026429e841321f863baa534
🧪 View experiment at: http://mlflow:5000/#/experiments/0
trial 5: n_estimators=100, max_depth=5, MSE = 0.481, R2 = 0.382
🏃 View run enchanting-snipe-687 at: htt