In [131]:
#Importando as bibliotecas necessárias
import sys
import os
import mlflow
import mlflow.sklearn
from urllib.parse import urlparse

In [132]:
os.environ['GIT_PYTHON_REFRESH']="quiet"

In [133]:
#Manipulação de dados
import pandas as pd

In [134]:
# Pré-Processamento
from sklearn.preprocessing import StandardScaler

In [135]:
# Criação do modelo
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

In [136]:
#Métricas
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [137]:
#Ignorar avisos de atualização, etc
import warnings
warnings.filterwarnings("ignore")

In [138]:
#Gráficos
import seaborn as sns
import matplotlib.pyplot as plt

In [139]:
import logging

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [140]:
experiment = 'poc'
# mlflow.set_tracking_uri(f"http://{os.getenv('MLFLOW_TRACKING_URI')}:5000")
if mlflow.get_experiment_by_name(experiment) is None:
    mlflow.create_experiment(experiment, artifact_location='s3://mlflow')
mlflow.set_experiment(experiment_name=experiment)

<Experiment: artifact_location='s3://mlflow', creation_time=1698102588057, experiment_id='395748697962635838', last_update_time=1698102588057, lifecycle_stage='active', name='poc', tags={}>

In [141]:
tags = {"Projeto": "Tutorial MLflow", "team": "Data Science", "dataset": "Wine"}

In [142]:
def metricas(y_test, y_predict):
    acuracia = accuracy_score(y_test, y_predict)
    precision = precision_score(y_test, y_predict,average='weighted')
    recall = recall_score(y_test, y_predict,average='weighted')
    f1 = f1_score(y_test, y_predict,average='weighted')
    return acuracia, precision, recall, f1

def matriz_confusao(y_test, y_predict):
    matriz_conf = confusion_matrix(y_test.values.ravel(), y_predict)
    fig = plt.figure()
    ax = plt.subplot()
    sns.heatmap(matriz_conf, annot=True, cmap='Blues', ax=ax);

    ax.set_xlabel('Valor Predito');
    ax.set_ylabel('Valor Real'); 
    ax.set_title('Matriz de Confusão'); 
    ax.xaxis.set_ticklabels(['Classe 1', 'Classe 2', 'Classe 3']);
    ax.yaxis.set_ticklabels(['Classe 1', 'Classe 2', 'Classe 3']);
    plt.close()
    return fig

In [143]:
from sklearn.datasets import load_iris
iris = load_iris(as_frame=True)
data = pd.DataFrame(iris.data)
target = pd.DataFrame(iris.target)
target = target.rename(columns={0: "target"})
df = pd.concat([data, target], axis=1)

In [144]:
#now that the data has been cleaned & explored, we can split the dataframe back into X (data) and y (target)

X = df.copy()
y = X.pop('target')

In [145]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1, stratify = y)

In [146]:
with mlflow.start_run(run_name='KNeighborsClassifier'):
    mlflow.sklearn.autolog()
    #Registro das tags
    mlflow.set_tags(tags)

    #Criação do modelo
    n_vizinhos = 10
    scaler = StandardScaler()
    knn = KNeighborsClassifier(n_neighbors=n_vizinhos)
    pipe = Pipeline([('scaler', StandardScaler()), ('knn', knn)])
    pipe.fit(x_train, y_train)
    #Predição dos valores de testes
    y_pred_knn = pipe.predict(x_test)

    #Métricas
    acuracia, precision, recall, f1 = metricas(y_test, y_pred_knn)
    print("Número de Vizinhos: {}\nAcurácia: {}\nPrecision: {}\nRecall: {}\nF1-Score: {}".
         format(n_vizinhos, acuracia, precision, recall, f1))
    #Matriz de confusão
    matriz_conf = matriz_confusao(y_test, y_pred_knn)
    temp_name = "confusion-matrix.png"
    matriz_conf.savefig(temp_name)
    # mlflow.log_artifact(temp_name, "confusion-matrix-plots")
    try:
        os.remove(temp_name)
    except FileNotFoundError as e:
        print(f"{temp_name} file is not found")

    #Registro dos parâmetros e das métricas
    mlflow.log_param("n_neighbors", n_vizinhos)
    mlflow.log_metric("Acuracia", acuracia)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("Recall", recall)
    mlflow.log_metric("F1-Score", f1)

    import joblib
    filename = "iris_dt_v1.joblib"
    joblib.dump(pipe, filename)
    






Número de Vizinhos: 10
Acurácia: 0.9666666666666667
Precision: 0.9696969696969696
Recall: 0.9666666666666667
F1-Score: 0.9665831244778613


In [147]:
loaded_model = joblib.load(filename)

y_predicted = loaded_model.predict(X.iloc[:4,:4])

print(y_predicted)

[0 0 0 0]
