Modelling

In [None]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import dagshub

# ðŸ”— DagsHub Init (ADVANCE)
dagshub.init(
    repo_owner="RoyanFirdaus313",
    repo_name="Eksperimen_SML_Royan-Firdaus",
    mlflow=True
)

df = pd.read_csv("namadataset_preprocessing/cleaned_df.csv")

X = df.drop(columns=["target"])
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

param_grid = {
    "n_estimators": [100, 200],
    "max_depth": [None, 10],
}

grid = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid,
    scoring="accuracy",
    cv=3
)

mlflow.set_experiment("RF_Tuning_Advance")

with mlflow.start_run():
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_

    y_pred = best_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")

    # Manual logging
    mlflow.log_params(grid.best_params_)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)

    # Artefak tambahan (ADVANCE)
    mlflow.log_artifact(__file__)
    mlflow.sklearn.log_model(best_model, "best_model")

    print("Training selesai - Advanced")
