In [3]:
import os, mlflow
mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI", "http://mlflow:5000"))
exp = mlflow.set_experiment("anime-recsys")
print("Tracking URI:", mlflow.get_tracking_uri(), "ExpID:", exp.experiment_id)

Tracking URI: http://mlflow:5000 ExpID: 1


In [4]:
import time
from pathlib import Path

with mlflow.start_run(run_name="day2-sanity"):
    mlflow.log_params({"algo":"popular","top_k":10})
    for step in range(1,6):
        mlflow.log_metric("precision_at_k", 0.20+step*0.02, step=step)
        time.sleep(0.05)

    Path("artifacts").mkdir(exist_ok=True)
    Path("artifacts/README.txt").write_text("Day2 sanity run for MLflow.")
    mlflow.log_artifact("artifacts/README.txt")

In [6]:
import pandas as pd
import mlflow
import mlflow.pyfunc as pyfunc
from pathlib import Path
from mlflow.models.signature import infer_signature

POPULAR = ["Naruto","One Piece","Bleach","Death Note","Your Name","Attack on Titan"]

# 準備 artifact
Path("artifacts").mkdir(exist_ok=True)
Path("artifacts/popular.txt").write_text("\n".join(POPULAR), encoding="utf-8")

class PopularRecs(pyfunc.PythonModel):
    def load_context(self, context):
        """
        當 MLflow 載入模型時，會先執行這個方法。
        這裡的任務是把訓練時存好的 artifact (人氣榜清單) 載入到記憶體。
        context.artifacts 是 MLflow 在載入模型時，幫你存放的檔案清單
        """
        with open(context.artifacts["popular_list"], encoding="utf-8") as f:
            # 讀取每一行，去除換行符號與空白行
            self.popular_list = [line.strip() for line in f if line.strip()]

    def predict(self, context, model_input: pd.DataFrame):
        """
        推論邏輯：
        1. 取得使用者已看過的動畫 (liked_titles)
        2. 把多個清單展平成一個大清單
        3. 轉成集合，避免重複
        4. 從人氣榜裡排除已看過的動畫
        5. 回傳前 k 個推薦結果
        """

        # 假設 liked_titles = [["Naruto", "Bleach"], ["One Piece"]]
        liked_titles = model_input["liked_titles"].tolist()

        # 用 list comprehension 展平：["Naruto", "Bleach", "One Piece"]
        all_watched = []
        for sublist in liked_titles:
            all_watched.extend(sublist)  # 把每個小清單接到大清單
        watched_set = set(all_watched)

        # 轉成集合 (去重)
        watched_set = set(all_watched)

        # 從輸入資料中取得 k，如果沒給就用 5
        k = model_input.get("k", pd.Series([5])).iloc[0]

        # 推薦邏輯：從人氣榜裡排除已看過的動畫，取前 k 個
        recommendations = [
            title for title in self.popular_list if title not in watched_set
        ][:int(k)]

        # 回傳 DataFrame，保持與 MLflow 定義一致
        return pd.DataFrame({"recommendations": [recommendations]})

In [7]:
input_example = pd.DataFrame({"liked_titles":[["Naruto","Bleach"]], "k":[5]})
output_example = pd.DataFrame({"recommendations":[["One Piece","Death Note","Your Name","Attack on Titan"]]})
signature = infer_signature(input_example, output_example)

with mlflow.start_run(run_name="popular-pyfunc") as run:
    mlflow.log_params({"algo":"popular","top_k_default":5})
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=PopularRecs(),
        artifacts={"popular_list":"artifacts/popular.txt"},
        input_example=input_example,
        signature=signature
    )
    run_id = run.info.run_id
    print("Run ID:", run_id)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 859.49it/s]  


Run ID: 54135eaf5bf548289a6d0a899d9dae2c


In [8]:
from mlflow.tracking import MlflowClient
import mlflow

client = MlflowClient()
model_uri = f"runs:/{run_id}/model"
name = "anime-recommender"

registered = mlflow.register_model(model_uri=model_uri, name=name)
client.transition_model_version_stage(
    name=name, version=registered.version, stage="Staging", archive_existing_versions=False
)
print(f"Registered {name} v{registered.version} → Staging")

Successfully registered model 'anime-recommender'.
2025/09/15 07:46:00 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: anime-recommender, version 1


Registered anime-recommender v1 → Staging


Created version '1' of model 'anime-recommender'.
  client.transition_model_version_stage(


In [9]:
loaded = mlflow.pyfunc.load_model("models:/anime-recommender/Staging")
pred = loaded.predict(pd.DataFrame({"liked_titles":[["Naruto","Your Name"]], "k":[3]}))
pred

  latest = client.get_latest_versions(name, None if stage is None else [stage])


Unnamed: 0,recommendations
0,"[One Piece, Bleach, Death Note]"
