# Imports

In [16]:
import sys
import pandas as pd
import joblib
sys.path.insert(0, '../src/')

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from feature_engine.imputation import MeanMedianImputer
from feature_engine.wrappers import SklearnTransformerWrapper
from sklearn.preprocessing import RobustScaler, StandardScaler
from feature_engine.discretisation import EqualFrequencyDiscretiser, EqualWidthDiscretiser


from data.data_load import DataLoad
from data.data_validation import DataValidation
from data.data_transformation import DataTransformation
from data.data_preprocess import DataPreprocess
from utils.utils import load_config_file
from train.train_model import TrainModel
from evaluation.classifiers_eval import ModelEvaluation

# 1.0 Data Load

In [17]:
dl = DataLoad()
df = dl.load_data('train_dataset_name')

2023-12-12 16:43:33 [info     ] Iniciando o carregamento


# 2.0 Data Validation

In [18]:
dv = DataValidation()
is_valid = dv.run(df)

2023-12-12 16:43:33 [info     ] Validação iniciou..
2023-12-12 16:43:34 [info     ] Validation columns passed...
2023-12-12 16:43:34 [info     ] Validacao com sucesso.


# 3.0 Data Transformation

In [19]:
dt = DataTransformation(df)

In [20]:
X_train, X_valid, y_train, y_valid = dt.train_test_split()

# 4.0 Experimentations

In [21]:
import mlflow
from mlflow.tracking import MlflowClient

In [22]:
mlflow.set_tracking_uri('http://127.0.0.1:5000')
mlflow.set_experiment('prob_loan')

<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1701870074069, experiment_id='1', last_update_time=1701870074069, lifecycle_stage='active', name='prob_loan', tags={}>

## 4.1 Select Best Model

In [23]:
current_experiment = dict(mlflow.get_experiment_by_name('prob_loan'))

In [24]:
experiment_id = current_experiment['experiment_id']

In [25]:
df_mlflow = mlflow.search_runs(filter_string='metrics.valid_roc_auc < 1').sort_values('metrics.valid_roc_auc', ascending=False)

In [26]:
df_mlflow

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.roc_auc,metrics.recall_score,metrics.f1_score,metrics.train_roc_auc,...,params.scaler,params.max_iter,params.class_weight,tags.mlflow.runName,tags.model_name,tags.mlflow.datasets,tags.mlflow.source.name,tags.mlflow.log-model.history,tags.mlflow.source.type,tags.mlflow.user
0,8716bec0259d4a02a84cfc08585599c3,1,FINISHED,mlflow-artifacts:/1/8716bec0259d4a02a84cfc0858...,2023-12-12 19:24:14.370000+00:00,2023-12-12 19:24:28.273000+00:00,0.851332,0.712904,0.351042,0.84735,...,SklearnTransformerWrapper(transformer=Standard...,588.0,,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""8716bec0259d4a02a84cfc08585599c3""...",LOCAL,thale
3,05cf63db08a14e6587da093599dd3d52,1,FINISHED,mlflow-artifacts:/1/05cf63db08a14e6587da093599...,2023-12-12 19:23:31.020000+00:00,2023-12-12 19:23:45.665000+00:00,0.851325,0.712602,0.351259,0.847328,...,SklearnTransformerWrapper(transformer=Standard...,560.0,,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""05cf63db08a14e6587da093599dd3d52""...",LOCAL,thale
1,6394160dd5e846618afa960d49c121e2,1,FINISHED,mlflow-artifacts:/1/6394160dd5e846618afa960d49...,2023-12-12 19:24:00.198000+00:00,2023-12-12 19:24:14.295000+00:00,0.851323,0.711998,0.351196,0.84732,...,SklearnTransformerWrapper(transformer=Standard...,470.0,,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""6394160dd5e846618afa960d49c121e2""...",LOCAL,thale
2,0a900dda5b3946e28c9c9d9f5fe9236e,1,FINISHED,mlflow-artifacts:/1/0a900dda5b3946e28c9c9d9f5f...,2023-12-12 19:23:45.740000+00:00,2023-12-12 19:24:00.119000+00:00,0.851322,0.711998,0.351327,0.847312,...,SklearnTransformerWrapper(transformer=Standard...,251.0,,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""0a900dda5b3946e28c9c9d9f5fe9236e""...",LOCAL,thale
4,9a2d76b5aa6f44e6b8b10f1fdca09012,1,FINISHED,mlflow-artifacts:/1/9a2d76b5aa6f44e6b8b10f1fdc...,2023-12-12 19:23:09.970000+00:00,2023-12-12 19:23:30.947000+00:00,0.838997,0.868843,0.236373,0.83495,...,SklearnTransformerWrapper(transformer=Standard...,565.0,balanced,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""9a2d76b5aa6f44e6b8b10f1fdca09012""...",LOCAL,thale
5,660b5b56d0b44248bfe2c5d547ce1017,1,FINISHED,mlflow-artifacts:/1/660b5b56d0b44248bfe2c5d547...,2023-12-06 13:53:42.864000+00:00,2023-12-06 13:53:56.896000+00:00,,,,0.692754,...,SklearnTransformerWrapper(transformer=Standard...,,,baseline,lr_baseline,,c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""660b5b56d0b44248bfe2c5d547ce1017""...",LOCAL,thale


In [27]:
# ID do melhor modelo com a melhor métrica de roc_auc
run_id = df_mlflow.loc[df_mlflow['metrics.valid_roc_auc'].idxmax()]['run_id']
run_id

'8716bec0259d4a02a84cfc08585599c3'

In [28]:
df_mlflow

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.roc_auc,metrics.recall_score,metrics.f1_score,metrics.train_roc_auc,...,params.scaler,params.max_iter,params.class_weight,tags.mlflow.runName,tags.model_name,tags.mlflow.datasets,tags.mlflow.source.name,tags.mlflow.log-model.history,tags.mlflow.source.type,tags.mlflow.user
0,8716bec0259d4a02a84cfc08585599c3,1,FINISHED,mlflow-artifacts:/1/8716bec0259d4a02a84cfc0858...,2023-12-12 19:24:14.370000+00:00,2023-12-12 19:24:28.273000+00:00,0.851332,0.712904,0.351042,0.84735,...,SklearnTransformerWrapper(transformer=Standard...,588.0,,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""8716bec0259d4a02a84cfc08585599c3""...",LOCAL,thale
3,05cf63db08a14e6587da093599dd3d52,1,FINISHED,mlflow-artifacts:/1/05cf63db08a14e6587da093599...,2023-12-12 19:23:31.020000+00:00,2023-12-12 19:23:45.665000+00:00,0.851325,0.712602,0.351259,0.847328,...,SklearnTransformerWrapper(transformer=Standard...,560.0,,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""05cf63db08a14e6587da093599dd3d52""...",LOCAL,thale
1,6394160dd5e846618afa960d49c121e2,1,FINISHED,mlflow-artifacts:/1/6394160dd5e846618afa960d49...,2023-12-12 19:24:00.198000+00:00,2023-12-12 19:24:14.295000+00:00,0.851323,0.711998,0.351196,0.84732,...,SklearnTransformerWrapper(transformer=Standard...,470.0,,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""6394160dd5e846618afa960d49c121e2""...",LOCAL,thale
2,0a900dda5b3946e28c9c9d9f5fe9236e,1,FINISHED,mlflow-artifacts:/1/0a900dda5b3946e28c9c9d9f5f...,2023-12-12 19:23:45.740000+00:00,2023-12-12 19:24:00.119000+00:00,0.851322,0.711998,0.351327,0.847312,...,SklearnTransformerWrapper(transformer=Standard...,251.0,,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""0a900dda5b3946e28c9c9d9f5fe9236e""...",LOCAL,thale
4,9a2d76b5aa6f44e6b8b10f1fdca09012,1,FINISHED,mlflow-artifacts:/1/9a2d76b5aa6f44e6b8b10f1fdc...,2023-12-12 19:23:09.970000+00:00,2023-12-12 19:23:30.947000+00:00,0.838997,0.868843,0.236373,0.83495,...,SklearnTransformerWrapper(transformer=Standard...,565.0,balanced,with_discretizer_hyperopt,lr_hyperopt,"[{""name"":""2ebb0c36f4b380f19dac359abbc730ef"",""h...",c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""9a2d76b5aa6f44e6b8b10f1fdca09012""...",LOCAL,thale
5,660b5b56d0b44248bfe2c5d547ce1017,1,FINISHED,mlflow-artifacts:/1/660b5b56d0b44248bfe2c5d547...,2023-12-06 13:53:42.864000+00:00,2023-12-06 13:53:56.896000+00:00,,,,0.692754,...,SklearnTransformerWrapper(transformer=Standard...,,,baseline,lr_baseline,,c:\Users\thale\Documents\Projetos_DS\ml_flow\e...,"[{""run_id"": ""660b5b56d0b44248bfe2c5d547ce1017""...",LOCAL,thale
