In [None]:
import mlflow
import mlflow.sklearn
from mlflow_log import log_gpu_info, log_git_info, log_python_env

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

mlflow.set_experiment("adult-income-rf-model")
mlflow.sklearn.autolog(
    log_models=True,
    log_input_examples=True,
    log_model_signatures=True,
    log_datasets=True,
)

X, y = fetch_openml(name="adult", version=2, as_frame=True, return_X_y=True)
X = X.select_dtypes(include=["number"]).fillna(0)  # simple preprocessing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

with mlflow.start_run(log_system_metrics=True):
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    acc = accuracy_score(y_test, preds)
    mlflow.sklearn.log_model(clf, artifact_path="model")


    mlflow.log_metric("final_accuracy", acc)