In [6]:
import argparse
import mlflow
import mlflow.sklearn
import numpy
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from mlflow.models import infer_signature

mlflow.set_tracking_uri("http://127.0.0.1:5000")

df = pd.read_csv('./data/creditcard.csv')

In [7]:
parser = argparse.ArgumentParser()
parser.add_argument("--max_iter", type=int, default=1000)
parser.add_argument("--n_estimators", type=int, default=100)

args, unknown = parser.parse_known_args()
# args = parser.parse_args()

In [8]:
X = df.drop(columns="Class")
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
mlflow.set_experiment("creditcard_experiment2")

<Experiment: artifact_location=('file:///C:/Users/SSAFY/Desktop/TIL/100_offline/8. DA/73일차 '
 '(10.1)/data_science2_ws_6_4/mlruns/1'), creation_time=1759296934826, experiment_id='1', last_update_time=1759296934826, lifecycle_stage='active', name='creditcard_experiment2', tags={}>

In [11]:
with mlflow.start_run(run_name="LogisticRegression"):
    model = LogisticRegression(max_iter=args.max_iter)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    rec = recall_score(y_test, preds)

    mlflow.set_tag("model", "LogisticRegression")
    mlflow.log_param("max_iter", 1000)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", prec)
    mlflow.log_metric("recall", rec)

    signature = infer_signature(X_train, preds)

    mlflow.sklearn.log_model(model, "model", registered_model_name="LogisticRegressionModel", signature=signature)

Registered model 'LogisticRegressionModel' already exists. Creating a new version of this model...
2025/10/01 15:26:58 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegressionModel, version 2


🏃 View run LogisticRegression at: http://127.0.0.1:5000/#/experiments/1/runs/384c5adb10ba4e009f8cc97c8905f220
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1


Created version '2' of model 'LogisticRegressionModel'.


In [12]:
with mlflow.start_run(run_name="RandomForest"):
    model = RandomForestClassifier(n_estimators=args.n_estimators)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)


    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    rec = recall_score(y_test, preds)

    mlflow.set_tag("model", "RandomForest")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", prec)
    mlflow.log_metric("recall", rec)

    signature = infer_signature(X_train, preds)

    mlflow.sklearn.log_model(model, "model", registered_model_name="RandomForestModel", signature=signature)

Registered model 'RandomForestModel' already exists. Creating a new version of this model...
2025/10/01 15:27:20 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestModel, version 2


🏃 View run RandomForest at: http://127.0.0.1:5000/#/experiments/1/runs/9bad3dba21204a1c8a495e2da1e9ee0b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1


Created version '2' of model 'RandomForestModel'.


RandomForest 모델이 LogisticRegression 모델보다 실행 시간이 더 길지만, 그 만큼 성능지표가 더 높게 나옵니다.