In [4]:
import mlflow
import mlflow.pyfunc
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os

# === 連線到 MLflow Server ===
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("anime-recsys-tfidf")

# === 載入資料 ===
DATA_DIR = "/usr/mlflow/data"
anime = pd.read_csv(os.path.join(DATA_DIR, "anime_clean.csv"))

# 儲存一份到 artifacts 資料夾
ARTIFACT_DIR = "./artifacts"
os.makedirs(ARTIFACT_DIR, exist_ok=True)
anime_path = os.path.join(ARTIFACT_DIR, "anime.csv")
anime.to_csv(anime_path, index=False)

# === 定義 TF-IDF 模型 ===
class TFIDFRecommender(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        import pandas as pd
        from sklearn.feature_extraction.text import TfidfVectorizer
        from sklearn.metrics.pairwise import cosine_similarity

        anime_path = context.artifacts["anime"]
        self.anime = pd.read_csv(anime_path)
        self.vectorizer = TfidfVectorizer(stop_words="english", max_features=3000)
        self.tfidf_matrix = self.vectorizer.fit_transform(self.anime["genre"].fillna(""))
        self.anime_titles = self.anime["name"].fillna("").tolist()

    def predict(self, context, model_input):
        query = " ".join(model_input[0].tolist())
        q_vec = self.vectorizer.transform([query])
        sims = cosine_similarity(q_vec, self.tfidf_matrix).flatten()
        top_idx = sims.argsort()[-10:][::-1]
        recommendations = [self.anime_titles[i] for i in top_idx]
        return [recommendations]

# === 註冊模型 ===
with mlflow.start_run(run_name="tfidf-with-artifact") as run:
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=TFIDFRecommender(),
        artifacts={"anime": anime_path},
        registered_model_name="AnimeRecsysTFIDF"
    )

print("✅ AnimeRecsysTFIDF 模型重新註冊完成，並附帶 anime.csv！")

# === 可選：自動切換 Stage ===
from mlflow.tracking import MlflowClient
client = MlflowClient()
latest = client.get_latest_versions("AnimeRecsysTFIDF")[0]
client.transition_model_version_stage("AnimeRecsysTFIDF", latest.version, stage="Staging")

print(f"✅ 模型版本 v{latest.version} 已切換至 Staging。")


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 812.38it/s] 
Registered model 'AnimeRecsysTFIDF' already exists. Creating a new version of this model...
2025/10/08 16:11:32 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: AnimeRecsysTFIDF, version 3
Created version '3' of model 'AnimeRecsysTFIDF'.
  latest = client.get_latest_versions("AnimeRecsysTFIDF")[0]
  client.transition_model_version_stage("AnimeRecsysTFIDF", latest.version, stage="Staging")


✅ AnimeRecsysTFIDF 模型重新註冊完成，並附帶 anime.csv！
✅ 模型版本 v2 已切換至 Staging。
