# Implémentez un modèle de scoring
## Notebook 3 - API  
OpenClassrooms - Parcours Data Scientist - Projet 07  

## 1. Imports

In [1]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import joblib
import warnings
import os

## 2. Paramétrages

In [2]:
warnings.filterwarnings('ignore')

In [3]:
mlflow.set_tracking_uri("http://127.0.0.1:5001")

In [None]:
base_dir = os.getcwd()
scaler_path = os.path.join(base_dir, '..', 'data', 'scaler.pkl')
run_id_path = os.path.join(base_dir, '..', 'data', 'run_id.txt')
df_train_median_path = os.path.join(base_dir, '..', 'data', 'df_train_median.csv')

## 3. Données

### 3.1. Chargement

In [4]:
df_train = pd.read_csv(df_train_median_path)

X_train = df_train.drop(columns=['TARGET', 'SK_ID_CURR'])
y_train = df_train['TARGET']

### 3.2. Mise à l'échelle

In [5]:
scaler = StandardScaler().fit(X_train.values)
X_train = scaler.transform(X_train.values)

### 3.3. Sauvegarde du scaler

In [6]:
joblib.dump(scaler, scaler_path)

['../data/scaler.pkl']

## 4. Entraînement du modèle et enregistrement MLflow

In [7]:
with mlflow.start_run(run_name="LogisticRegressionModel") as run:
    # Entraînement du modèle
    lr_model = LogisticRegression(C=5e-05, class_weight='balanced', max_iter=10000, solver='lbfgs')
    lr_model.fit(X_train, y_train)
    # Enregistrement dans MLflow
    mlflow.sklearn.log_model(lr_model, "LogisticRegressionModel")
    # Récupération et enregistrement du run_id
    run_id = run.info.run_id
    with open(run_id_path, "w") as f:
        f.write(run_id)