# ML flow 

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns

import mlflow
import mlflow.sklearn # Wrapper pour scikit-learn

from lightgbm import LGBMClassifier

from sklearn.metrics import f1_score, PrecisionRecallDisplay, precision_recall_curve, plot_precision_recall_curve

from google.cloud import storage

d = os.getcwd()

X_train = pd.read_csv(d+"\\data\\X_train.csv")
X_test = pd.read_csv(d+"\\data\\X_test.csv")
y_train = pd.read_csv(d+"\\data\\y_train.csv")
y_test = pd.read_csv(d+"\\data\\y_test.csv")

# Hyper-paramètres des modèles
hyp_params = {
    "num_leaves": 60,
    "min_child_samples": 10,
    "max_depth": 12,
    "n_estimators": 100,
    "learning_rate": 0.1
}

# Authentification à Google Cloud avec la clé correspondant au compte de service MLflow
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = d+"\\data\\mlflow_key.json"

# Identification de l'interface MLflow
mlflow.set_tracking_uri("http://34.140.81.235")
client = storage.Client()

mlflow.set_experiment("purchase_predict_2")

def save_pr_curve(X, y, model):
    plt.figure(figsize=(16,11))
    prec, recall, _ = precision_recall_curve(y, model.predict_proba(X)[:,1], pos_label=1)
    pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot(ax=plt.gca())
    plt.title("PR Curve", fontsize=16)
    plt.gca().xaxis.set_major_formatter(mtick.PercentFormatter(1, 0))
    plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1, 0))
    plt.savefig(os.path.expanduser("~/data/pr_curve.png"))
    plt.close()

def train_model(params):
    
    with mlflow.start_run() as run:
        model = LGBMClassifier(**params, objective="binary", verbose=-1)
        model.fit(X_train, y_train)

        score = f1_score(y_test, model.predict(X_test))
        save_pr_curve(X_test, y_test, model)

        mlflow.log_params(hyp_params)
        mlflow.log_metric("f1", score)
        mlflow.log_artifact(os.path.expanduser("~/data/pr_curve.png"), artifact_path="plots")
        mlflow.sklearn.log_model(model, "model")


train_model({**hyp_params, **{'n_estimators': 200, 'learning_rate': 0.05}})



ConnectionError: HTTPSConnectionPool(host='104.155.63.113', port=443): Max retries exceeded with url: /api/2.0/mlflow/experiments/get-by-name?experiment_name=purchase_predict_2 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x000001A396328B70>: Failed to establish a new connection: [WinError 10060] Une tentative de connexion a échoué car le parti connecté n’a pas répondu convenablement au-delà d’une certaine durée ou une connexion établie a échoué car l’hôte de connexion n’a pas répondu'))