In [2]:
import os
import pandas as pd

DATA_DIR = "/usr/mlflow/data"

anime = pd.read_csv(os.path.join(DATA_DIR, "anime_clean.csv"))
ratings_train = pd.read_csv(os.path.join(DATA_DIR, "ratings_train.csv"))
ratings_test = pd.read_csv(os.path.join(DATA_DIR, "ratings_test.csv"))

print("Anime:", anime.shape)
print("Train:", ratings_train.shape)
print("Test:", ratings_test.shape)

Anime: (12294, 7)
Train: (633755, 3)
Test: (316831, 3)


In [5]:
anime_stats = (
    ratings_train.groupby("anime_id")
    .agg(avg_rating=("rating", "mean"),
         count=("rating", "count"))
    .reset_index()
)

# 過濾掉評分數太少的動畫
anime_stats = anime_stats[anime_stats["count"] > 50]

# 取 Top-10
top10 = anime_stats.sort_values(
    ["avg_rating", "count"], ascending=[False, False]
).head(10)

top10_ids = top10["anime_id"].tolist()
top10_titles = anime[anime["anime_id"].isin(top10_ids)]["name"].tolist()

print("Top-10 Anime:", top10_titles)


Top-10 Anime: ['Kimi no Na wa.', 'Fullmetal Alchemist: Brotherhood', 'Gintama°', 'Steins;Gate', 'Gintama&#039;', 'Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou', 'Hunter x Hunter (2011)', 'Ginga Eiyuu Densetsu', 'Gintama&#039;: Enchousen', 'Gintama']


In [6]:
import mlflow.pyfunc

class PopularTop10(mlflow.pyfunc.PythonModel):
    def __init__(self, df, top10_ids):
        self.df = df
        self.top10_ids = top10_ids

    # 這裡的 predict 就是未來 API 推薦會呼叫的方法
    def predict(self, context, model_input):
        return self.df[self.df["anime_id"].isin(self.top10_ids)]["name"].tolist()

In [9]:
import mlflow
from mlflow.tracking import MlflowClient

mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("anime-recsys-model-registry")

with mlflow.start_run(run_name="popular-top10") as run:
    # 紀錄參數與評估指標
    mlflow.log_param("model_type", "PopularTop10")
    mlflow.log_metric("avg_rating_mean", top10["avg_rating"].mean())
    mlflow.log_metric("min_count", top10["count"].min())

    # 註冊模型到 Registry
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=PopularTop10(anime, top10_ids),
        registered_model_name="AnimeRecsysModel"
    )


2025/09/22 16:39:42 INFO mlflow.tracking.fluent: Experiment with name 'anime-recsys-model-registry' does not exist. Creating a new experiment.
Registered model 'AnimeRecsysModel' already exists. Creating a new version of this model...
2025/09/22 16:39:43 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: AnimeRecsysModel, version 2
Created version '2' of model 'AnimeRecsysModel'.


In [8]:
client = MlflowClient()

# 把 Version 1 升級到 Staging
client.transition_model_version_stage(
    name="AnimeRecsysModel",
    version=1,
    stage="Staging"
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1758558605820, current_stage='Staging', description='', last_updated_timestamp=1758558798877, name='AnimeRecsysModel', run_id='5e9cba83ade242f5985cd41466f4f38c', run_link='', source='/mlflow/artifacts/1/5e9cba83ade242f5985cd41466f4f38c/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>