In [1]:
# train.py

import mlflow
from mlflow.tracking import MlflowClient
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 데이터 불러오기
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# 하이퍼파라미터
C = 1.0
solver = "lbfgs"

print("OK")
print("MLflow Tracking URI:", mlflow.get_tracking_uri())


* 'schema_extra' has been renamed to 'json_schema_extra'


OK
MLflow Tracking URI: file:///app/notebooks/mlruns


In [2]:
# 모델 훈련
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("Default")

# MLflow 실험 기록 시작
with mlflow.start_run():
    model = LogisticRegression(C=C, solver=solver, max_iter=200)
    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)

    # 파라미터와 메트릭 기록
    mlflow.log_param("C", C)
    mlflow.log_param("solver", solver)
    mlflow.log_metric("accuracy", acc)

    # 모델 저장
    mlflow.sklearn.log_model(model, "model")

    print(f"모델 정확도: {acc}")

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



모델 정확도: 1.0


In [25]:
# Run으로부터 모델 받아와서 등록
model_uri = "runs:/b0e995d320af491489d227dd04ef4941/model"
model = mlflow.sklearn.load_model(model_uri)

In [27]:
mlflow.register_model(
    model_uri="runs:/b0e995d320af491489d227dd04ef4941/model",
    name="iris-logistic-regression" # 해당 이름으로 모델 등록. 있다면 새로운 버전으로
)

Registered model 'iris-logistic-regression' already exists. Creating a new version of this model...
2025/04/21 03:12:26 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris-logistic-regression, version 3
Created version '3' of model 'iris-logistic-regression'.


<ModelVersion: aliases=[], creation_timestamp=1745205146195, current_stage='None', description='', last_updated_timestamp=1745205146195, name='iris-logistic-regression', run_id='b0e995d320af491489d227dd04ef4941', run_link='', source='/mlflow/artifacts/0/b0e995d320af491489d227dd04ef4941/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='3'>

In [28]:
client = MlflowClient()

client.transition_model_version_stage( # 이 이름의 버전1 모델 Staging으로 지정
    name="iris-logistic-regression",
    version=1,
    stage="Staging"
)

<ModelVersion: aliases=[], creation_timestamp=1745204611768, current_stage='Staging', description='', last_updated_timestamp=1745205151366, name='iris-logistic-regression', run_id='7f270f49827b434382b1777c44f933e2', run_link='', source='/mlflow/artifacts/0/7f270f49827b434382b1777c44f933e2/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [7]:
# 등록된 모델을 다른 조건들로 돌리기
for C in [0.1, 1.0, 10.0]:
    with mlflow.start_run():
        model = LogisticRegression(C=C, solver="lbfgs", max_iter=200)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        acc = accuracy_score(y_test, preds)

        mlflow.log_param("C", C)
        mlflow.log_param("solver", "lbfgs")
        mlflow.log_metric("accuracy", acc)
        mlflow.sklearn.log_model(model, "model")

        print(f"C={C} 정확도={acc}")



C=0.1 정확도=1.0




C=1.0 정확도=1.0
C=10.0 정확도=1.0




In [8]:
# run-1와 같이 이름 붙인 run들 진행
for i, C in enumerate([0.1, 1.0, 10.0], start=1):
    with mlflow.start_run(run_name=f"run-{i}"):
        model = LogisticRegression(C=C, solver="lbfgs", max_iter=200)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        acc = accuracy_score(y_test, preds)

        mlflow.log_param("C", C)
        mlflow.log_param("solver", "lbfgs")
        mlflow.log_metric("accuracy", acc)
        mlflow.sklearn.log_model(model, "model")

        print(f"[run-{i}] C={C} 정확도={acc}")



[run-1] C=0.1 정확도=1.0




[run-2] C=1.0 정확도=1.0
[run-3] C=10.0 정확도=1.0




In [9]:
# Default Path에 있는 run들 리스트 뽑아오기
client = mlflow.tracking.MlflowClient()
experiment_id = client.get_experiment_by_name("Default").experiment_id

runs = client.search_runs(experiment_ids=[experiment_id])
for run in runs:
    print(run.info.run_name, run.info.run_id)

run-3 b0e995d320af491489d227dd04ef4941
run-2 7f270f49827b434382b1777c44f933e2
run-1 2eeddf6a815146239fb7030cf6ea174d
resilient-ram-833 20dec1b7caee4652b809182eb1d78c49
marvelous-fox-237 41967e52f721402dbba86d4ffb1a6d27
unequaled-calf-559 5aff886fef4e4bd792af7969bd19e156
fortunate-fish-631 0f861d30cf1d42e19a09fa804dbfde53


In [10]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
experiment_id = client.get_experiment_by_name("Default").experiment_id

target_run_name = "run-2"
runs = client.search_runs([experiment_id])

run_id = None
for run in runs:
    if run.info.run_name == target_run_name:
        run_id = run.info.run_id
        break

if run_id:
    print(f"✅ Run ID for '{target_run_name}': {run_id}")
else:
    print("❌ Run Name not found.")

✅ Run ID for 'run-2': 7f270f49827b434382b1777c44f933e2


In [11]:
# 모델 등록
model_uri = f"runs:/{run_id}/model"

result = mlflow.register_model(
    model_uri=model_uri,
    name="iris-logistic-regression"
)

print(f"🎯 모델 등록 완료: name={result.name}, version={result.version}")

Successfully registered model 'iris-logistic-regression'.
2025/04/21 03:03:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris-logistic-regression, version 1


🎯 모델 등록 완료: name=iris-logistic-regression, version=1


Created version '1' of model 'iris-logistic-regression'.


In [12]:
# 모델 스테이지 전환
client.transition_model_version_stage(
    name="iris-logistic-regression",
    version=result.version,
    stage="Staging"  # 또는 "Production"
)

<ModelVersion: aliases=[], creation_timestamp=1745204611768, current_stage='Staging', description='', last_updated_timestamp=1745204615735, name='iris-logistic-regression', run_id='7f270f49827b434382b1777c44f933e2', run_link='', source='/mlflow/artifacts/0/7f270f49827b434382b1777c44f933e2/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [14]:
# Staging 상태인 <모델 이름 입력> 로딩하기
model_name = "iris-logistic-regression"

# 'Staging' 상태인 모델 버전 불러오기
model_uri = f"models:/{model_name}/Staging"

# 로딩
model = mlflow.pyfunc.load_model(model_uri)

# 예측 사용 (예시)
preds = model.predict(X_test)
print("예측 결과 일부:", preds[:5])

예측 결과 일부: [1 0 2 1 1]


In [15]:
# 모델 상태 확인
client = MlflowClient()
models = client.search_model_versions(f"name='{model_name}'")

for m in models:
    run_id = m.run_id
    run = client.get_run(run_id)

    accuracy = run.data.metrics.get("accuracy")

    run_name = run.info.run_name
    C = run.data.params.get("C")
    print(f"버전 {m.version} | 상태: {m.current_stage} | 정확도: {accuracy} | C={C} | 이름={run_name}")

버전 1 | 상태: Staging | 정확도: 1.0 | C=1.0 | 이름=run-2


In [16]:
#정확도가 가장 높은 1개의 모델만 Staging으로 지정하고 나머지는 Archived로 지정
client = MlflowClient()
model_name = "iris-logistic-regression"
versions = client.search_model_versions(f"name='{model_name}'")

# 정확도 기준으로 최고 모델 선택
best_accuracy = -1
best_version = None
for mv in versions:
    run = client.get_run(mv.run_id)
    acc = run.data.metrics.get("accuracy")
    if acc is not None and acc > best_accuracy:
        best_accuracy = acc
        best_version = mv.version

# 기존 Staging 모델 전부 Archived 처리
for mv in versions:
    if mv.current_stage == "Staging" and mv.version != best_version:
        client.transition_model_version_stage(
            name=model_name,
            version=mv.version,
            stage="Archived"
        )

# 최고 모델을 Staging으로 전환
if best_version:
    client.transition_model_version_stage(
        name=model_name,
        version=best_version,
        stage="Staging"
    )
    print(f"✅ 버전 {best_version}이 Staging으로 지정됨 (정확도={best_accuracy})")

✅ 버전 1이 Staging으로 지정됨 (정확도=1.0)


In [17]:
# 정확도 상위 5개 Run 검색
runs = mlflow.search_runs(
    experiment_ids=["0"], # 지금 모든 run들은 id 0을 가진 상태
    order_by=["metrics.accuracy DESC"],
    max_results=5
)

# 각 Run에서 모델 로드 및 평가
for _, run in runs.iterrows():
    run_id = run["run_id"]
    model_uri = f"runs:/{run_id}/model"
    model = mlflow.pyfunc.load_model(model_uri)

    # 예시: 테스트 입력에 대한 예측 수행
    import pandas as pd
    preds = model.predict(X_test)
    print("예측 결과 일부:", preds[:5])
    print(f"[Run: {run_id}] 예측 결과: {preds[:5]}")

예측 결과 일부: [1 0 2 1 1]
[Run: b0e995d320af491489d227dd04ef4941] 예측 결과: [1 0 2 1 1]
예측 결과 일부: [1 0 2 1 1]
[Run: 7f270f49827b434382b1777c44f933e2] 예측 결과: [1 0 2 1 1]
예측 결과 일부: [1 0 2 1 1]
[Run: 2eeddf6a815146239fb7030cf6ea174d] 예측 결과: [1 0 2 1 1]
예측 결과 일부: [1 0 2 1 1]
[Run: 20dec1b7caee4652b809182eb1d78c49] 예측 결과: [1 0 2 1 1]
예측 결과 일부: [1 0 2 1 1]
[Run: 41967e52f721402dbba86d4ffb1a6d27] 예측 결과: [1 0 2 1 1]


In [18]:
# 가장 정확도가 높은 run의 모델을 iris-logistic-regression으로 등록하기
client = MlflowClient()
experiment_id = "0"  # 필요 시 변경
registered_model_name = "iris-logistic-regression"

# 1. 모든 Run 불러오기
runs_df = mlflow.search_runs(
    experiment_ids=[experiment_id],
    order_by=["metrics.accuracy DESC"]
)

# 2. 정확도 기준으로 상위 1개 Run 선택
best_run = runs_df.iloc[0]
best_run_id = best_run["run_id"]
best_accuracy = best_run["metrics.accuracy"]

# 3. 선택된 Run의 모델 경로 설정
model_uri = f"runs:/{best_run_id}/model"

# 4. 모델을 등록 (이미 등록된 경우 예외 발생 가능, try-except로 감싸도 좋음)
mv = mlflow.register_model(
    model_uri=model_uri,
    name=registered_model_name
)

best_version = mv.version

# 5. 기존 Staging 모델은 모두 Archived 처리
versions = client.search_model_versions(f"name='{registered_model_name}'")
for v in versions:
    if v.current_stage == "Staging" and v.version != best_version:
        client.transition_model_version_stage(
            name=registered_model_name,
            version=v.version,
            stage="Archived"
        )

# 6. 최고 정확도 모델을 Staging으로 설정
client.transition_model_version_stage(
    name=registered_model_name,
    version=best_version,
    stage="Staging"
)

print(f"✅ 정확도 {best_accuracy}인 모델 Run({best_run_id})이 버전 {best_version}으로 등록되고 Staging 상태로 전환되었습니다.")

Registered model 'iris-logistic-regression' already exists. Creating a new version of this model...
2025/04/21 03:03:46 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris-logistic-regression, version 2


✅ 정확도 1.0인 모델 Run(b0e995d320af491489d227dd04ef4941)이 버전 2으로 등록되고 Staging 상태로 전환되었습니다.


Created version '2' of model 'iris-logistic-regression'.


In [19]:
# 모델들의 상태 확인
client = MlflowClient()
model_name = "iris-logistic-regression"  # 원하는 모델 이름으로

# 해당 모델의 모든 버전 정보 가져오기
versions = client.search_model_versions(f"name='{model_name}'")

# 각 버전의 상태 출력
for mv in versions:
    print(f"📦 Model: {mv.name}, 🔢 Version: {mv.version}, 🏷️ Stage: {mv.current_stage}")

📦 Model: iris-logistic-regression, 🔢 Version: 2, 🏷️ Stage: Staging
📦 Model: iris-logistic-regression, 🔢 Version: 1, 🏷️ Stage: Archived


In [20]:
# 모델 이름들을 기준으로 상태들 확인
model_names = ["iris-logistic-regression", "sklearn"]  # 필요한 모델 이름들 수동 작성

for model_name in model_names:
    print(f"\n🔍 모델 이름: {model_name}")
    versions = client.search_model_versions(f"name='{model_name}'")
    for mv in versions:
        print(f"  - 버전: {mv.version}, Stage: {mv.current_stage}, Run ID: {mv.run_id}")


🔍 모델 이름: iris-logistic-regression
  - 버전: 2, Stage: Staging, Run ID: b0e995d320af491489d227dd04ef4941
  - 버전: 1, Stage: Archived, Run ID: 7f270f49827b434382b1777c44f933e2

🔍 모델 이름: sklearn


In [29]:
# 최근 5개 Run 불러오기, 그중 가장 정확도가 높은걸 찾아 현재의 Staging 모델과 비교
client = MlflowClient()
experiment_id = "0"
registered_model_name = "iris-logistic-regression"

# 1. 최근 5개 Run 불러오기
recent_runs = mlflow.search_runs(
    experiment_ids=[experiment_id],
    filter_string="attributes.status = 'FINISHED'",
    order_by=["start_time DESC"],
    max_results=5
)

# 2. 정확도 기준 상위 1개 Run 선택
best_run = recent_runs.sort_values(by="metrics.accuracy", ascending=False).iloc[0]
best_run_id = best_run["run_id"]
best_accuracy = best_run["metrics.accuracy"]

# 3. 현재 Staging에 있는 모델의 정확도 가져오기
staging_versions = [
    v for v in client.search_model_versions(f"name='{registered_model_name}'")
    if v.current_stage == "Staging"
]

if staging_versions:
    # 가장 최근 Staging 버전의 run_id를 통해 정확도 조회
    staging_run_id = staging_versions[0].run_id
    staging_run = mlflow.get_run(staging_run_id)
    staging_accuracy = staging_run.data.metrics.get("accuracy", -1)
else:
    staging_accuracy = -1  # 없으면 비교 위해 음수로 설정

print(f"📊 현재 Staging 정확도: {staging_accuracy}, 후보 모델 정확도: {best_accuracy}")

# 4. 정확도가 더 높은 경우에만 업데이트
if best_accuracy > staging_accuracy:
    model_uri = f"runs:/{best_run_id}/model"

    try:
        mv = mlflow.register_model(model_uri=model_uri, name=registered_model_name)
        best_version = mv.version
    except Exception as e:
        print("이미 등록된 모델입니다. 버전을 가져옵니다.")
        versions = client.search_model_versions(f"name='{registered_model_name}'")
        best_version = max(int(v.version) for v in versions if v.run_id == best_run_id)

    # 기존 Staging 모델은 모두 Archived 처리
    for v in versions:
        if v.current_stage == "Staging" and v.version != str(best_version):
            client.transition_model_version_stage(
                name=registered_model_name,
                version=v.version,
                stage="Archived"
            )

    # 새 모델을 Staging으로 전환
    client.transition_model_version_stage(
        name=registered_model_name,
        version=best_version,
        stage="Staging"
    )

    print(f"✅ 정확도 {best_accuracy}인 모델 Run({best_run_id})이 Staging으로 업데이트되었습니다.")
else:
    print("⏭ 기존 Staging 모델의 정확도가 더 높거나 같아서 업데이트하지 않았습니다.")

📊 현재 Staging 정확도: 1.0, 후보 모델 정확도: 1.0
⏭ 기존 Staging 모델의 정확도가 더 높거나 같아서 업데이트하지 않았습니다.


In [34]:
#모델 테스트
# 데이터 로딩
iris = load_iris()
X_test = pd.DataFrame(iris.data[:10], columns=iris.feature_names)
y_test = iris.target[:10]

# 모델 불러오기
model = mlflow.pyfunc.load_model("models:/iris-logistic-regression/Staging")

# 예측
y_pred = model.predict(X_test)

# 정확도 평가
acc = accuracy_score(y_test, y_pred)
print(f"✅ 정확도: {acc:.2f}")

✅ 정확도: 1.00


