In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, log_loss,
    ConfusionMatrixDisplay, RocCurveDisplay
)
import matplotlib.pyplot as plt

import mlflow
import mlflow.sklearn

In [None]:
credito = pd.read_csv('Credit.csv')
credito.shape

In [None]:
credito.head()

In [None]:
for col in credito.columns:
    if credito[col].dtype == 'object':
        credito[col] = credito[col].astype('category').cat.codes

In [None]:
credito.head()

In [None]:
previsores = credito.iloc[:,0:20].values
classe = credito.iloc[:,20].values

In [None]:
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(previsores,classe,
                                        test_size=0.3,random_state=123)

In [None]:
def treina_rf(n_estimators):
    mlflow.set_experiment("rfexperimento")
    with mlflow.start_run():
        
        modelorf = RandomForestClassifier(n_estimators=n_estimators)
        modelorf.fit(X_treinamento, y_treinamento)
        previsoes = modelorf.predict(X_teste)

        #log de hiper parametros
        mlflow.log_param("n_estimators", n_estimators)

        #métricas
        acuracia = accuracy_score(y_teste, previsoes)
        recall = recall_score(y_teste, previsoes)
        precision = precision_score(y_teste, previsoes)
        f1 = f1_score(y_teste, previsoes)
        auc= roc_auc_score(y_teste, previsoes)
        log = log_loss(y_teste, previsoes)

        #registrar métricas
        mlflow.log_metric("acuracia",acuracia)
        mlflow.log_metric("recall",recall)
        mlflow.log_metric("precision",precision)
        mlflow.log_metric("f1",f1)
        mlflow.log_metric("auc",auc)
        mlflow.log_metric("log",log)    

        #gráficos
        confusion = ConfusionMatrixDisplay.from_estimator(modelorf, X_teste, y_teste)
        plt.savefig("confusionrf.png")
        roc = RocCurveDisplay.from_estimator(modelorf, X_teste, y_teste)
        plt.savefig("rocfr.png")

        #logar gráficos
        mlflow.log_artifact("confusionrf.png")
        mlflow.log_artifact("rocfr.png")      

        #modelo
        mlflow.sklearn.log_model(modelorf,"ModeloNB")

        #informações da execução
        print("Modelo: ", mlflow.active_run().info.run_uuid)   

    mlflow.end_run()

In [None]:
arvores = [50,100,500,750,1000]
for n in arvores:
    treina_rf(n)