![Logo](imgs/logo.jpeg)

## Criando um Experimento

In [1]:
#Importando as bibliotecas necessárias
import sys
import os
import mlflow
import mlflow.sklearn
from urllib.parse import urlparse

#Manipulação de dados
import pandas as pd

# Pré-Processamento
from sklearn.preprocessing import StandardScaler

# Criação do modelo
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

#Métricas
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

#Ignorar avisos de atualização, etc
import warnings
warnings.filterwarnings("ignore")

#Gráficos
import seaborn as sns

import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

import logging

In [2]:
# logging.basicConfig(level=logging.WARN)
# logger = logging.getLogger(__name__)  # só para pegar a sessao e quem fez o log

ARTIFACT_PATH = "model"
mlflow.set_tracking_uri('http://localhost:5000')  # porta do mlflow
# #mlflow.set_tracking_uri('mysql://root:root@localhost:3306/mlflow')  # caso fosse um banco mysql por exemplo
mlflow.set_experiment(experiment_name='test_mlflow')  # nome do experimento

# # nome das tags
# tags = {
#         "Projeto": "Live MLflow",
#         "team": "Data Hackers",
#         "dataset": "Diabetes"
#        }

In [3]:
def metricas(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

def matriz_confusao(y_test, y_predict):
    matriz_conf = confusion_matrix(y_test.values.ravel(), y_predict)
    fig = plt.figure()
    ax = plt.subplot()
    sns.heatmap(matriz_conf, annot=True, cmap='Blues', ax=ax);

    ax.set_xlabel('Valor Predito');
    ax.set_ylabel('Valor Real'); 
    ax.set_title('Matriz de Confusão'); 
    ax.xaxis.set_ticklabels(['0', '1']);
    ax.yaxis.set_ticklabels(['0', '1']);
    plt.close()
    return fig

In [4]:
def modelo():
    #Criação do modelo
    max_depth = int(input("Profundidade máxima da arvore: "))
    balanced = int(input("Balancear Classe (1-S/0-N): "))
    balanced = "balanced" if balanced == 1 else None
    clf = RandomForestClassifier(random_state=42, class_weight=balanced, max_depth=max_depth)
    clf.fit(x_train, y_train)
    return clf

def previsao(x_test, modelo):
    y_pred = modelo.predict(x_test)
    return y_pred

In [5]:
warnings.filterwarnings("ignore")

df = pd.read_csv("data/diabetes.csv")

train, test = train_test_split(df, random_state=42)
x_train = train.drop(columns=["Outcome"])
x_test = test.drop(columns=["Outcome"])
y_train = train[["Outcome"]]
y_test = test[["Outcome"]]

# o run_name é só o nome da execução, poderiamos colocar qualquer nome
with mlflow.start_run(run_name='rf_v14'):
    warnings.filterwarnings("ignore")
    #Registro das tags
#     mlflow.set_tags(tags)

    #Criação do modelo
    max_depth = int(input("Profundidade máxima da arvore: "))
    balanced = int(input("Balancear Classe (1-S/0-N): "))
    balanced = "balanced" if balanced == 1 else None
    clf = RandomForestClassifier(random_state=42, class_weight=balanced, max_depth=max_depth)
    clf.fit(x_train, y_train)
    
    #Predição dos valores de testes
    y_pred = clf.predict(x_test)
    
    #Métricas
    acuracia, precision, recall, f1 = metricas(y_test, y_pred)
    print("Acurácia: {}\nPrecision: {}\nRecall: {}\nF1-Score: {}".
         format(acuracia, precision, recall, f1))

    #Matriz de confusão
#     matriz_conf = matriz_confusao(y_test, y_pred)
#     temp_name = "confusion-matrix.png"
#     matriz_conf.savefig(temp_name)
#     mlflow.log_artifact(temp_name, "confusion-matrix-plots")
#     try:
#         os.remove(temp_name)
#     except FileNotFoundError as e:
#         print(f"{temp_name} file is not found")

    #Registro dos parâmetros e das métricas
    mlflow.log_param("balanced", balanced)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_metric("Acuracia", acuracia)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("Recall", recall)
    mlflow.log_metric("F1-Score", f1)

    #Registro do modelo
#     mlflow.sklearn.log_model(clf, "rf_test")
#     mlflow.log_artifact(local_path='./train.py', artifact_path='code')
#     mlflow.log_artifact('train.py')

    mlflow.sklearn.log_model(
        clf,
        ARTIFACT_PATH
#         registered_model_name="sk-learn-random-forest-reg-model"
    )
    
    model_uri = mlflow.get_artifact_uri(ARTIFACT_PATH)
    print(f"Model artifact logged to: {model_uri}")

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh()

All git commands will error until this is rectified.

$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - error|e|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



Profundidade máxima da arvore:  1
Balancear Classe (1-S/0-N):  0


Acurácia: 0.7135416666666666
Precision: 0.7692307692307693
Recall: 0.2898550724637681
F1-Score: 0.4210526315789474
Model artifact logged to: ./artifacts/1/b42bccc23934441db4387b166dabd747/artifacts/model


---

### Gerando o Código de Treino

In [4]:
%%writefile train.py

#Importando as bibliotecas necessárias
import sys
import os
import mlflow
import mlflow.sklearn
from urllib.parse import urlparse

#Manipulação de dados
import pandas as pd

# Pré-Processamento
from sklearn.preprocessing import StandardScaler

# Criação do modelo
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

#Métricas
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

#Ignorar avisos de atualização, etc
import warnings
warnings.filterwarnings("ignore")

#Gráficos
import seaborn as sns

import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

import logging

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

mlflow.set_tracking_uri('http://localhost:5000')
#mlflow.set_tracking_uri('mysql://root:root@localhost:3306/mlflow')
mlflow.set_experiment(experiment_name='Diabetes_Classification2')

tags = {
        "Projeto": "Live MLflow",
        "team": "Data Hackers",
        "dataset": "Diabetes"
       }

def metricas(y_test, y_predict):
    acuracia = accuracy_score(y_test, y_predict)
    precision = precision_score(y_test, y_predict)
    recall = recall_score(y_test, y_predict)
    f1 = f1_score(y_test, y_predict)
    return acuracia, precision, recall, f1

def matriz_confusao(y_test, y_predict):
    matriz_conf = confusion_matrix(y_test.values.ravel(), y_predict)
    fig = plt.figure()
    ax = plt.subplot()
    sns.heatmap(matriz_conf, annot=True, cmap='Blues', ax=ax);

    ax.set_xlabel('Valor Predito');
    ax.set_ylabel('Valor Real'); 
    ax.set_title('Matriz de Confusão'); 
    ax.xaxis.set_ticklabels(['0', '1']);
    ax.yaxis.set_ticklabels(['0', '1']);
    plt.close()
    return fig

def modelo():
    #Criação do modelo
    max_depth = int(input("Profundidade máxima da arvore: "))
    balanced = int(input("Balancear Classe (1-S/0-N): "))
    balanced = "balanced" if balanced == 1 else None
    clf = RandomForestClassifier(random_state=42, class_weight=balanced, max_depth=max_depth)
    clf.fit(x_train, y_train)
    #balanced = "none" if balanced == None else "balanced"
    return max_depth, balanced, clf

def previsao(x_test, modelo):
    y_pred = modelo.predict(x_test)
    return y_pred

if __name__ == "__main__":
    warnings.filterwarnings("ignore")

    df = pd.read_csv("data/diabetes.csv")

    train, test = train_test_split(df, random_state=42)
    x_train = train.drop(columns=["Outcome"])
    x_test = test.drop(columns=["Outcome"])
    y_train = train[["Outcome"]]
    y_test = test[["Outcome"]]
    
    with mlflow.start_run(run_name='RandomForestClassifier'):
        warnings.filterwarnings("ignore")
        #Registro das tags
        mlflow.set_tags(tags)

        #Criação do modelo
        max_depth, balanced, clf = modelo()

        #Predição dos valores de testes
        y_pred = previsao(x_test, clf)

        #Métricas
        acuracia, precision, recall, f1 = metricas(y_test, y_pred)
        print("Acurácia: {}\nPrecision: {}\nRecall: {}\nF1-Score: {}".
             format(acuracia, precision, recall, f1))

        #Matriz de confusão
        matriz_conf = matriz_confusao(y_test, y_pred)
        temp_name = "confusion-matrix.png"
        matriz_conf.savefig(temp_name)
        mlflow.log_artifact(temp_name, "confusion-matrix-plots")
        try:
            os.remove(temp_name)
        except FileNotFoundError as e:
            print(f"{temp_name} file is not found")

        #Registro dos parâmetros e das métricas
        mlflow.log_param("balanced", balanced)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_metric("Acuracia", acuracia)
        mlflow.log_metric("Precision", precision)
        mlflow.log_metric("Recall", recall)
        mlflow.log_metric("F1-Score", f1)

        #Registro do modelo
        mlflow.sklearn.log_model(clf, "model")
        mlflow.log_artifact(local_path='./train.py', artifact_path='code')

Overwriting teste.py


---

## Carregando o Modelo de Produção e Fazendo Previsões

In [7]:
import mlflow
import pandas as pd

mlflow.set_tracking_uri('http://localhost:5000')

# Carregando o modelo que está em produção
# live = nome do modelo registrado
# Production é o stado dele no momento (poderia ser Staging por exemplo)
logged_model = 'models:/test_rfmodel/Production'
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [8]:
teste = pd.read_csv('data/teste2.csv')

In [9]:
print(loaded_model.predict(teste))

[0 0 0 0 0]
