In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import PowerTransformer, OrdinalEncoder, OneHotEncoder, LabelEncoder
from sklearn.model_selection import LeaveOneOut, cross_val_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import optuna
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    balanced_accuracy_score
)
from sklearn.preprocessing import label_binarize
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from sklearn.preprocessing import label_binarize
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import sklearn
from sklearn.metrics import (
    roc_auc_score, roc_curve, balanced_accuracy_score
)
import mlflow
import matplotlib.ticker as ticker
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
pd.options.display.max_columns = None
sklearn.set_config(transform_output='pandas')

In [2]:
df = pd.read_csv('student.csv').drop(columns=['number', 'Id'])
df['Attendance'] = df['Attendance'].map({"Always": 3, "Sometimes": 2, "Never": 1, "3": None})
df['Scholarship'] = df['Scholarship'].fillna("0%").str.replace("%", "").astype(int)

grade_mapping = {"Fail": 0, "FD": 1, "DD": 2, "DC": 3, "CC": 4, "CB": 5, "BB": 6, "BA": 7, "AA": 8}
df['Grade'] = df['Grade'].map(grade_mapping)

X = df.drop(columns=['Grade'])
y = df['Grade']

le = LabelEncoder()
y_encoded = pd.Series(le.fit_transform(y))


FileNotFoundError: [Errno 2] No such file or directory: 'student.csv'

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

In [None]:
ohe_pipe = Pipeline([
    ("SimpleImputer", SimpleImputer(strategy="constant", fill_value="unknown")),
    ("OneHotEncoder", OneHotEncoder(sparse_output=False, handle_unknown="ignore"))
])

ord_pipe = Pipeline([
    ("SimpleImputer", SimpleImputer(strategy="constant", fill_value="unknown")),
    ("OrdinalEncoder", OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
])

num_pipe = Pipeline([
    ("PowerTransformer", PowerTransformer())
])

ohe_list = ['Sex', 'High_School_Type', 'Transportation']
ord_list = X.select_dtypes(include="object").columns.drop(ohe_list)
num_list = X.select_dtypes(exclude="object").columns


In [None]:
transform = ColumnTransformer([
    ("ord_pipe", ord_pipe, ord_list),
    ("num_pipe", num_pipe, num_list),
    ("ohe_pipe", ohe_pipe, ohe_list)
])


In [None]:
loo = LeaveOneOut()

In [None]:
X_train

In [None]:
len(y_train)

In [None]:
def objective_xgb(trial):
    params = {
        "model__estimator__n_estimators": trial.suggest_categorical("model__estimator__n_estimators", [1000]),
        "model__estimator__max_depth": trial.suggest_int("model__estimator__max_depth", 3, 10),
        "model__estimator__learning_rate": trial.suggest_float("model__estimator__learning_rate", 0.001, 0.05),
        "model__estimator__subsample": trial.suggest_float("model__estimator__subsample", 0.5, 1.0),
        "model__estimator__colsample_bytree": trial.suggest_float("model__estimator__colsample_bytree", 0.1, 0.6),
        "model__estimator__gamma": trial.suggest_float("model__estimator__gamma", 0.0, 1.0),
        "model__estimator__reg_alpha": trial.suggest_float("model__estimator__reg_alpha", 0.0, 1.0),
    }
    pipe = Pipeline([
        ("transformation", transform),
        ("model", OneVsRestClassifier(XGBClassifier()))
    ])
    pipe.set_params(**params)
    loo=LeaveOneOut()

    f1_scores = []

    for X_train_index, X_test_index in loo.split(X_train):
        X_train_loo, X_test_loo = X_train.iloc[X_train_index], X_train.iloc[X_test_index]
        y_train_loo, y_test_loo = y_train.iloc[X_train_index], y_train.iloc[X_test_index]
        pipe.fit(X_train_loo, y_train_loo)
        y_pred=pipe.predict(X_test_loo)
        f1 = f1_score(y_test_loo, y_pred, average='weighted')
        f1_scores.append(f1)
    return sum(f1_scores)/ len(f1_scores)

study_xgb = optuna.create_study(direction="maximize")
study_xgb.optimize(objective_xgb, n_trials=2)
best_xgb = study_xgb.best_params

In [None]:
best_xgb

In [None]:
pipe = Pipeline([
        ("transformation", transform),
        ("model", OneVsRestClassifier(XGBClassifier()))
    ])
pipe.fit(X_train,y_train)

In [12]:
def report_metrics(mlflow, y_test, y_pred, y_proba, sufix):
    accuracy = accuracy_score(y_test, y_pred)
    balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    # roc_auc = roc_auc_score(y_test, y_proba)
    cm = confusion_matrix(y_test, y_pred)

    print(f"Accuracy_{sufix}: {accuracy}")
    print(f"Balanced Accuracy_{sufix}: {balanced_accuracy}")
    print(f"Precision_{sufix}: {precision}")
    print(f"Recall_{sufix}: {recall}")
    print(f"F1 Score_{sufix}: {f1}")
    # print(f"ROC AUC_{sufix}: {roc_auc}")
    print(f"Confusion Matrix_{sufix}:\n", cm)

    mlflow.log_metric(f"accuracy_{sufix}", accuracy)
    mlflow.log_metric(f"balanced_accuracy_{sufix}", balanced_accuracy)
    mlflow.log_metric(f"precision_{sufix}", precision)
    mlflow.log_metric(f"recall_{sufix}", recall)
    mlflow.log_metric(f"f1_score_{sufix}", f1)
    # mlflow.log_metric(f"roc_auc_{sufix}", roc_auc)

    mlflow.log_text(f"Confusion Matrix:\n{cm}", f"confusion_matrix_{sufix}.txt")




    # fpr, tpr, thresholds = roc_curve(y_test, y_proba)
    # plt.figure(figsize=(30, 20))
    # plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {roc_auc:.2f})")
    # plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
    # plt.title("ROC Curve")
    # plt.xlabel("False Positive Rate")
    # plt.ylabel("True Positive Rate")
    # plt.legend(loc="lower right")
    # mlflow.log_figure(plt.gcf(), f"roc_curve_{sufix}.png")
    # plt.close()
    
    
    #overall confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(len(y_test.unique())))
    disp.plot(cmap=plt.cm.Blues)
    plt.title("Confusion Matrix")
    mlflow.log_figure(plt.gcf(), f"confusion_matrix_{sufix}.png")
    plt.close()

    # Cохранить confusion matrix по бинарным классам


def mlflow_run(best_pipeline, best_params, X_test, y_test):
    mlflow.log_params(best_params)

    # 7. Predict on the test set
    y_pred = best_pipeline.predict(X_test)
    y_proba = best_pipeline.predict_proba(X_test)[:,]  # Probability for positive class


    report_metrics(mlflow, y_test, y_pred, y_proba, 'ml')

    # Log the final model
    mlflow.sklearn.log_model(best_pipeline, "model_pipeline")

    #importances = best_pipeline.named_steps['model'].feature_importances_
    imp_sum = []
    ovr = best_pipeline.named_steps['model']
    for est in ovr.estimators_:
        imp_sum.append(est.feature_importances_)
    importances = pd.DataFrame(imp_sum).mean(axis=0)
    feature_names = best_pipeline.named_steps['transformation'].get_feature_names_out()
    #feature_names = best_pipeline.named_steps['transform'].get_feature_names_out()


    # Create a DataFrame of feature names and their importances
    feat_imp_df = pd.DataFrame({
        'feature': feature_names,
        'importance': importances
    }).sort_values(by='importance', ascending=False)

    feat_imp_df = feat_imp_df[feat_imp_df['importance'] > 0].head(40)
    total_importance = feat_imp_df['importance'].sum()
    feat_imp_df['importance_fraction'] = feat_imp_df['importance'] / total_importance
    
    # 2. Create a horizontal bar chart
    fig, ax = plt.subplots(figsize=(25, 25))
    bars = ax.barh(feat_imp_df['feature'], feat_imp_df['importance_fraction'], color='skyblue')
    ax.invert_yaxis()  # so the highest importance is at the top
    
    ax.set_title("Feature Importances from XGBoost")
    ax.set_xlabel("Importance (%)")
    ax.set_ylabel("Features")
    # Format x-axis from 0..1 into 0..100%
    ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
    
    # 3. Annotate each bar with its numeric percentage
    for bar in bars:
        width = bar.get_width()  # This is the feature importance fraction
        label_text = f"{width:.1%}"  # Convert fraction to percent string, e.g. '12.3%'
        # We'll place the text just to the right of the bar, vertically centered
        ax.text(
            width + 0.01,                       # x-position
            bar.get_y() + bar.get_height() / 2, # y-position
            label_text,
            va='center'
        )
    
    # 3a. Log figure to MLflow (MLflow >= 1.22)
    mlflow.log_figure(plt.gcf(), "feature_importances.png")


    feat_imp_df.to_csv("feature_importances.csv", index=False)
    mlflow.log_artifact("feature_importances.csv")
    
    plt.close()
    

    print("Run finished. All results have been logged to MLflow.")


In [None]:
mlflow.set_experiment("raw_data")
with mlflow.start_run(run_name=f"XGBoost_first_optuna"):
    mlflow_run(pipe, best_xgb, X_test, y_test)

In [None]:
def objective_lgbm(trial):
    params = {
        "model__estimator__n_estimators": trial.suggest_int("model__estimator__n_estimators", 100, 1000),
        "model__estimator__max_depth": trial.suggest_int("model__estimator__max_depth", 3, 12),
        "model__estimator__learning_rate": trial.suggest_float("model__estimator__learning_rate", 0.01, 0.3),
        "model__estimator__num_leaves": trial.suggest_int("model__estimator__num_leaves", 15, 100),
        "model__estimator__subsample": trial.suggest_float("model__estimator__subsample", 0.6, 1.0),
        "model__estimator__colsample_bytree": trial.suggest_float("model__estimator__colsample_bytree", 0.6, 1.0),
        "model__estimator__reg_alpha": trial.suggest_float("model__estimator__reg_alpha", 0.0, 1.0),
        "model__estimator__reg_lambda": trial.suggest_float("model__estimator__reg_lambda", 0.0, 1.0),
    }
    pipe = Pipeline([
        ("transformation", transform),
        ("model", OneVsRestClassifier(LGBMClassifier(random_state=42)))
    ])
    pipe.set_params(**params)
    loo=LeaveOneOut()

    f1_scores = []

    for X_train_index, X_test_index in loo.split(X_train):
        X_train_loo, X_test_loo = X_train.iloc[X_train_index], X_train.iloc[X_test_index]
        y_train_loo, y_test_loo = y_train.iloc[X_train_index], y_train.iloc[X_test_index]
        pipe.fit(X_train_loo, y_train_loo)
        y_pred=pipe.predict(X_test_loo)
        f1 = f1_score(y_test_loo, y_pred, average='weighted')
        f1_scores.append(f1)
    return sum(f1_scores)/ len(f1_scores)

study_lgbm = optuna.create_study(direction="maximize")
study_lgbm.optimize(objective_lgbm, n_trials=2)
best_lgbm = study_lgbm.best_params

In [None]:
pipe_lgbm = Pipeline([
    ("transformation", transform),
    ("model", OneVsRestClassifier(LGBMClassifier(random_state=42)))
])

pipe_lgbm.set_params(**best_lgbm)

pipe_lgbm.fit(X_train, y_train)

mlflow.set_experiment("raw_data")
with mlflow.start_run(run_name="LightGBM_first_optuna"):
    mlflow_run(pipe_lgbm, best_lgbm, X_test, y_test)

In [None]:
def objective_cat(trial):
    params = {
        "model__estimator__iterations": trial.suggest_int("model__estimator__iterations", 300, 1000),
        "model__estimator__depth": trial.suggest_int("model__estimator__depth", 4, 10),
        "model__estimator__learning_rate": trial.suggest_float("model__estimator__learning_rate", 0.01, 0.3),
        "model__estimator__l2_leaf_reg": trial.suggest_float("model__estimator__l2_leaf_reg", 1.0, 10.0),
    }
    pipe = Pipeline([
        ("transformation", transform),
        ("model", OneVsRestClassifier(CatBoostClassifier(verbose=0)))
    ])
    pipe.set_params(**params)
    loo=LeaveOneOut()

    f1_scores = []

    for X_train_index, X_test_index in loo.split(X_train):
        X_train_loo, X_test_loo = X_train.iloc[X_train_index], X_train.iloc[X_test_index]
        y_train_loo, y_test_loo = y_train.iloc[X_train_index], y_train.iloc[X_test_index]
        pipe.fit(X_train_loo, y_train_loo)
        y_pred=pipe.predict(X_test_loo)
        f1 = f1_score(y_test_loo, y_pred, average='weighted')
        f1_scores.append(f1)
    return sum(f1_scores)/ len(f1_scores)

study_cat = optuna.create_study(direction="maximize")
study_cat.optimize(objective_cat, n_trials=2)
best_cat = study_cat.best_params

In [None]:
pipe_cat = Pipeline([
    ("transformation", transform),
    ("model", OneVsRestClassifier(CatBoostClassifier(verbose=0)))
])

pipe_cat.set_params(**best_cat)

pipe_cat.fit(X_train, y_train)

mlflow.set_experiment("raw_data")
with mlflow.start_run(run_name="CatBoost_first_optuna"):
    mlflow_run(pipe_cat, best_cat, X_test, y_test)

In [18]:
models = {
    "XGBoost": Pipeline([
        ("transformation", transform),
        ("model",  OneVsRestClassifier(XGBClassifier()))
    ]),
    "LightGBM": Pipeline([
        ("transformation", transform),
        ("model",  OneVsRestClassifier(LGBMClassifier(random_state=42)))
    ]),
    "CatBoost": Pipeline([
        ("transformation", transform),
        ("model", OneVsRestClassifier(CatBoostClassifier(verbose=0)))
    ])
}

In [None]:
models["XGBoost"].set_params(**best_xgb)
models["LightGBM"].set_params(**best_lgbm)
models["CatBoost"].set_params(**best_cat)


In [20]:
def calculate_metrics(conf_matrix, class_idx, y_true=None, y_scores=None, beta=0.5):
    tp = conf_matrix[class_idx, class_idx]
    fn = conf_matrix[class_idx, :].sum() - tp
    fp = conf_matrix[:, class_idx].sum() - tp
    tn = conf_matrix.sum() - (tp + fp + fn)

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    accuracy = (tp + tn) / (tp + tn + fp + fn) if (tp + tn + fp + fn) > 0 else 0
    f1 = 2 * tp / (2 * tp + fp + fn) if (2 * tp + fp + fn) > 0 else 0
    fbeta = (1 + beta**2) * precision * recall / (beta**2 * precision + recall) if (precision + recall) > 0 else 0
    balanced_acc = (recall + specificity) / 2

    metrics = {
        "Accuracy": accuracy,
        "Confusion Matrix": [[int(tp), int(fp)], [int(fn), int(tn)]],
        "Precision": precision,
        "Recall (Sensitivity)": recall,
        "Specificity": specificity,
        "FPR": fpr,
        "F1 Score": f1,
        f"F{int(beta)} Score": fbeta,
        "Balanced Accuracy": balanced_acc,
    }

    if y_true is not None and y_scores is not None:
        try:
            metrics["ROC-AUC"] = roc_auc_score(y_true, y_scores)
            fpr_arr, tpr_arr, thresholds = roc_curve(y_true, y_scores)
            metrics["ROC Curve"] = {
                "FPR": fpr_arr.tolist(),
                "TPR": tpr_arr.tolist(),
                "Thresholds": thresholds.tolist()
            }
        except Exception as e:
            metrics["ROC-AUC"] = None
            metrics["ROC Curve"] = {"error": str(e)}

    return metrics

In [None]:
results = {}

for model_name, model in models.items():
    print(f"Model: {model_name}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    conf_mat = confusion_matrix(y_test, y_pred)
    print("Overall Confusion Matrix:")
    print(conf_mat)

    for class_idx in range(len(le.classes_)):
        print(f"Class {le.inverse_transform([class_idx])[0]} (label {class_idx}):")

        y_test_bin = (y_test == class_idx).astype(int)
        y_pred_bin = (y_pred == class_idx).astype(int)

        try:
            if hasattr(model.named_steps["model"], "predict_proba"):
                y_scores = model.predict_proba(X_test)[:, class_idx]
            else:
                y_scores = y_pred_bin
        except:
            y_scores = y_pred_bin

        metrics = calculate_metrics(conf_mat, class_idx, y_true=y_test_bin, y_scores=y_scores)
        results[(model_name, class_idx)] = metrics

        for k, v in metrics.items():
            if isinstance(v, dict):
                continue
            print(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}")

In [None]:
for model_name, model in models.items():
    print(f"Plotting One-vs-Rest ROC curves for model: {model_name}")
    model.fit(X_train, y_train)
    
    if not hasattr(model.named_steps["model"], "predict_proba"):
        print(f"Model {model_name} does not support probability predictions. Skipping ROC plots.")
        continue

    y_proba = model.predict_proba(X_test)
    n_classes = len(le.classes_)
    y_test_bin = label_binarize(y_test, classes=list(range(n_classes)))

    plt.figure(figsize=(12, 8))
    for class_idx in range(n_classes):
        fpr, tpr, _ = roc_curve(y_test_bin[:, class_idx], y_proba[:, class_idx])
        auc_score = roc_auc_score(y_test_bin[:, class_idx], y_proba[:, class_idx])
        class_label = le.inverse_transform([class_idx])[0]
        plt.plot(fpr, tpr, label=f"Class {class_label} vs Rest (AUC = {auc_score:.2f})")

    plt.plot([0, 1], [0, 1], 'k--', label="Random Guessing")
    plt.title(f"One-vs-Rest ROC Curves - {model_name}")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend(loc='lower right')
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [None]:
def evaluate_model(model, model_name, X_test, y_test, beta=2.0):
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)
    conf = confusion_matrix(y_test, y_pred, labels=list(range(len(le.classes_))))
    
    metrics_list = []

    for class_idx in range(len(le.classes_)):
        bin_true = (y_test == class_idx).astype(int)
        bin_score = y_proba[:, class_idx]

        metrics = calculate_metrics(
            conf_matrix=conf,
            class_idx=class_idx,
            y_true=bin_true,
            y_scores=bin_score,
            beta=beta
        )
        metrics["Model"] = model_name
        metrics["Class"] = le.inverse_transform([class_idx])[0]
        metrics_list.append(metrics)

    return pd.DataFrame(metrics_list)

all_metrics = pd.concat([
    evaluate_model(model, name, X_test, y_test)
    for name, model in models.items()
], ignore_index=True)


cols = ["Model", "Class", "Accuracy", "Precision", "Recall (Sensitivity)", "Specificity",
        "FPR", "F1 Score", "F2 Score", "Balanced Accuracy", "ROC-AUC"]
all_metrics = all_metrics[cols]

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.3f}'.format)

display(all_metrics.sort_values(by=["Model", "Class"]))

In [None]:
def plot_overall_confusion_matrix(model, model_name, X_test, y_test, class_labels):
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred, labels=range(len(class_labels)))

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
    plt.title(f'Overall Confusion Matrix - {model_name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.show()

def plot_binary_confusion_matrix(model, model_name, X_test, y_test, class_idx, class_name):
    y_pred = model.predict(X_test)
    bin_true = (y_test == class_idx).astype(int)
    bin_pred = (y_pred == class_idx).astype(int)
    
    cm = confusion_matrix(bin_true, bin_pred)

    plt.figure(figsize=(4, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Oranges", cbar=False,
                xticklabels=["Not " + class_name, class_name],
                yticklabels=["Not " + class_name, class_name])
    plt.title(f'Binary Confusion Matrix - {model_name} - Class: {class_name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.show()

for model_name, model in models.items():
    print(f"🔷 {model_name}")
    plot_overall_confusion_matrix(model, model_name, X_test, y_test, le.classes_)

    for class_idx, class_name in enumerate(le.classes_):
        plot_binary_confusion_matrix(model, model_name, X_test, y_test, class_idx, str(class_name))

In [None]:
def plot_roc_pr_curves(model, X_test, y_test, model_name):
    y_score = model.predict_proba(X_test)
    n_classes = len(le.classes_)
    y_test_bin = label_binarize(y_test, classes=range(n_classes))

    # ROC Curve
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    plt.figure(figsize=(10, 8))
    for i in range(n_classes):
        plt.plot(fpr[i], tpr[i], lw=2, label=f"Class {le.inverse_transform([i])[0]} (AUC = {roc_auc[i]:.2f})")
    plt.plot([0, 1], [0, 1], "--", color="gray")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"ROC Curve - {model_name}")
    plt.legend(loc="lower right")
    plt.grid()
    plt.show()

    # Precision-Recall Curve
    precision = dict()
    recall = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test_bin[:, i], y_score[:, i])

    plt.figure(figsize=(10, 8))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i], lw=2, label=f"Class {le.inverse_transform([i])[0]}")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"Precision-Recall Curve - {model_name}")
    plt.legend(loc="lower left")
    plt.grid()
    plt.show()

# Построение графиков для каждой модели
plot_roc_pr_curves(pipe, X_test, y_test, "XGBoost")
plot_roc_pr_curves(pipe_lgbm, X_test, y_test, "LightGBM")
plot_roc_pr_curves(pipe_cat, X_test, y_test, "CatBoost")

In [2]:
import mlflow
logged_model = 'runs:/d8a256f3cc1c49dabc6dc6392394c69e/model_pipeline'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
#import pandas as pd
#loaded_model.predict(X_test)

In [None]:
import joblib
joblib.dump(loaded_model, 'model.pkl')


In [None]:
joblib.load('model.pkl')