In [1]:
import os
import pandas as pd
import mlflow
import mlflow.pyfunc
from mlflow.tracking import MlflowClient

# 設定 MLflow Tracking
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("anime-recsys-serve")

# 資料路徑
DATA_DIR = "/usr/mlflow/data"
anime = pd.read_csv(os.path.join(DATA_DIR, "anime_clean.csv"))
ratings_train = pd.read_csv(os.path.join(DATA_DIR, "ratings_train.csv"))

print("Anime:", anime.shape)
print("Train:", ratings_train.shape)

# 訓練 Popular Top-10 模型
top10 = (
    ratings_train.groupby("anime_id")["rating"]
    .mean()
    .reset_index()
    .merge(anime[["anime_id", "name"]], on="anime_id")
    .sort_values("rating", ascending=False)
    .head(10)
)

top10_ids = top10["anime_id"].tolist()
top10_names = top10["name"].tolist()
print("Top 10 Anime:", top10_names)

# 定義 PopularTop10 模型
class PopularTop10(mlflow.pyfunc.PythonModel):
    def __init__(self, anime_df, top10_ids):
        self.anime = anime_df
        self.top10_ids = top10_ids

    def predict(self, context, model_input):
        # 輸入是動畫名稱，但 PopularTop10 不看輸入，永遠回傳 Top10
        return [self.anime[self.anime["anime_id"].isin(self.top10_ids)]["name"].tolist()]

2025/09/24 16:20:15 INFO mlflow.tracking.fluent: Experiment with name 'anime-recsys-serve' does not exist. Creating a new experiment.


Anime: (12294, 7)
Train: (633755, 3)
Top 10 Anime: ['Dead Girl Trailer', 'Kirin Monoshiri Yakata', 'Marginal Prince: Gekkeiju no Ouji-tachi - Tokyo Merry-Go-Round', 'Hontou ni Atta', 'Zeonic Toyota Special Movie', 'Ketsuinu', 'Shusaku Liberty', 'Uchuu no Kishi Tekkaman Blade OVA: Twin Blood', 'Mobile Suit Gundam Unicorn: One of Seventy Two', 'Crayon Shin-chan Movie 17: Otakebe! Kasukabe Yasei Oukoku']


In [2]:
client = MlflowClient()

with mlflow.start_run(run_name="popular-top10-registry") as run:
    mlflow.log_param("model_type", "PopularTop10")

    # 註冊模型到 Registry
    result = mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=PopularTop10(anime, top10_ids),
        registered_model_name="AnimeRecsysModel"
    )

    run_id = run.info.run_id
    print("Run ID:", run_id)

# 找到剛剛註冊的最新版本
latest_versions = client.get_latest_versions("AnimeRecsysModel")
for v in latest_versions:
    print("Version:", v.version, "Stage:", v.current_stage)
    new_version = v.version  # 取最新的 version

Registered model 'AnimeRecsysModel' already exists. Creating a new version of this model...
2025/09/24 16:20:34 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: AnimeRecsysModel, version 5


Run ID: ad990ec59fba4f33980ef79c73713241
Version: 4 Stage: Staging
Version: 5 Stage: None


Created version '5' of model 'AnimeRecsysModel'.
  latest_versions = client.get_latest_versions("AnimeRecsysModel")


In [3]:
# 把最新版本移到 Staging
client.transition_model_version_stage(
    name="AnimeRecsysModel",
    version=new_version,
    stage="Staging",
    archive_existing_versions=True
)

print(f"✅ Model AnimeRecsysModel v{new_version} 已移到 Staging")

  client.transition_model_version_stage(


✅ Model AnimeRecsysModel v5 已移到 Staging
