# Registar um pipeline no mlflow

Um pipeline não é mais que a composição de vários modelos/transformações

In [6]:
import mlflow

In [7]:
pip install mlflow

Note: you may need to restart the kernel to use updated packages.


In [8]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier  
from sklearn.neural_network import MLPClassifier 
from sklearn.pipeline import Pipeline

In [9]:
ROOT_PATH = '../../data/'
SEED = 42
TARGET_COL = "Outcome"

## Definir a diretoria onde as experiências são guardadas

In [10]:
from pathlib import Path

uri = "../../mlruns"

Path(uri).mkdir(parents=True, exist_ok=True)

mlflow.set_tracking_uri(uri)

## Fazer set da experiência "Diabetes Prediction Experiment"

In [11]:
mlflow.set_experiment("Diabetes Prediction Experiment")

<Experiment: artifact_location=('file:c:/Users/diogo/OneDrive/Desktop/Data '
 'scienc/OML/OML-main/notebooks/mlflow/../../mlruns/295588846736925902'), creation_time=1740777635527, experiment_id='295588846736925902', last_update_time=1740777635527, lifecycle_stage='active', name='Diabetes Prediction Experiment', tags={}>

## Criar os datasets

In [12]:
train_path = ROOT_PATH + 'diabetes_train.csv'
test_path = ROOT_PATH + 'diabetes_test.csv'

train_set = pd.read_csv(train_path)
test_set = pd.read_csv(test_path)

X_train = train_set.drop([TARGET_COL], axis = 1)
y_train = train_set[TARGET_COL]

X_test = test_set.drop([TARGET_COL], axis = 1)
y_test = test_set[TARGET_COL]

X_train.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,3,173,78,39,185,33.8,0.97,31
1,6,134,80,37,370,46.2,0.238,46
2,5,104,74,0,0,28.8,0.153,48
3,1,139,46,19,83,28.7,0.654,22
4,5,137,108,0,0,48.8,0.227,37


## Criar uma run

In [13]:
run = mlflow.start_run(run_name="rede neuronal - Run -c2- pipeline")
RUN_ID = run.info.run_uuid
RUN_ID

'5ecaa2c1f5c0484c8681a8d1804d547c'

## Guardar datasets, modelos, artefactos, métricas e parametros da run

In [14]:
# guardarmos o dataset de treino e de teste associado à run
train_dataset = mlflow.data.from_pandas(train_set, source=train_path, targets=TARGET_COL, name="Diabetes Train Dataset")
test_dataset = mlflow.data.from_pandas(test_set, source=test_path, targets=TARGET_COL, name="Diabetes Test Dataset")
mlflow.log_input(train_dataset, context="train")
mlflow.log_input(test_dataset, context="test")

# Guardamos a seed utilizado como parametro
mlflow.log_param("seed", SEED)

  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(


42

In [15]:
# estamos a logar o pipeline total 
mlp_pipeline = Pipeline(
    steps=[
        ("scaler", StandardScaler()),
        ("neural_network", MLPClassifier(random_state=SEED, hidden_layer_sizes=(100,), max_iter=300))
])
mlp_pipeline.fit(X_train, y_train)
mlflow.sklearn.log_model(mlp_pipeline, artifact_path="mlp_pipeline", registered_model_name="neuronal_reg")
mlp_pipeline

Registered model 'neuronal_reg' already exists. Creating a new version of this model...
Created version '2' of model 'neuronal_reg'.


In [16]:
params=mlp_pipeline.get_params()

modified_params = {}
for k, v in params.items():
    new_key = k.replace("neural_network__", '')
    modified_params[new_key] = v

mlflow.log_params(modified_params)
modified_params

{'memory': None,
 'steps': [('scaler', StandardScaler()),
  ('neural_network', MLPClassifier(max_iter=300, random_state=42))],
 'transform_input': None,
 'verbose': False,
 'scaler': StandardScaler(),
 'neural_network': MLPClassifier(max_iter=300, random_state=42),
 'scaler__copy': True,
 'scaler__with_mean': True,
 'scaler__with_std': True,
 'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 300,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': 42,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'warm_start': False}

In [17]:
y_preds = mlp_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_preds)
mlflow.log_metric("accuracy", acc)
acc

0.7543859649122807

## Terminar a run

In [18]:
mlflow.end_run()

## Consultar uma run já concluida

In [19]:
run = mlflow.get_run(RUN_ID)

In [20]:
run.data

<RunData: metrics={'accuracy': 0.7543859649122807}, params={'activation': 'relu',
 'alpha': '0.0001',
 'batch_size': 'auto',
 'beta_1': '0.9',
 'beta_2': '0.999',
 'early_stopping': 'False',
 'epsilon': '1e-08',
 'hidden_layer_sizes': '(100,)',
 'learning_rate': 'constant',
 'learning_rate_init': '0.001',
 'max_fun': '15000',
 'max_iter': '300',
 'memory': 'None',
 'momentum': '0.9',
 'n_iter_no_change': '10',
 'nesterovs_momentum': 'True',
 'neural_network': 'MLPClassifier(max_iter=300, random_state=42)',
 'power_t': '0.5',
 'random_state': '42',
 'scaler': 'StandardScaler()',
 'scaler__copy': 'True',
 'scaler__with_mean': 'True',
 'scaler__with_std': 'True',
 'seed': '42',
 'shuffle': 'True',
 'solver': 'adam',
 'steps': "[('scaler', StandardScaler()), ('neural_network', "
          'MLPClassifier(max_iter=300, random_state=42))]',
 'tol': '0.0001',
 'transform_input': 'None',
 'validation_fraction': '0.1',
 'verbose': 'False',
 'warm_start': 'False'}, tags={'mlflow.log-model.history

## Ver a experiência na UI do mlflow

A UI do mlflow permite ver de forma visual todas as experiências criadas e permite por exemplo, comparar, filtar e ordenar, as runs dentro de uma experiência de forma visual.

Para correr a UI do mflow é necessário executar, na raiz deste projeto (pasta rumos) e tendo activo o ambiente utilizado neste projeto, o comando:

`mlflow ui --backend-store-uri ./mlruns`

**Nota:** O comando em cima irá iniciar a UI de mlflow na porta 5000. Caso queiram mudar esta porta devem acrescentar `--port <PORT>` ao comando (em que <PORT> deve ser substituido pela porta desejada). 

O comando acima não irá funcionar caso tenham tido alguns problemas no Windows com a instalação do mlflow. Caso tenham problemas, considerem instalar o `mlflow-ui`, ao invés do mlflow.

Após executarem este comando, vão poder ver a UI do mlflow no vosso browser acedendo a 

`http://127.0.0.1:5000`

(se tiverem alterado a porta em que o mlflow UI é iniciado então devem de alterar também aqui o 5000 por essa porta)

Na tab de `Experiments` podem explorar as experiências e runs que criaram.