In [None]:
# Testando experimento no MLFlow

import os
import mlflow
import mlflow.sklearn
from sklearn.pipeline import Pipeline
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns, plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier,
                              GradientBoostingClassifier)
from sklearn.metrics import (accuracy_score, precision_score, confusion_matrix, recall_score,
                             f1_score)

# Carregando o dataframe

data = pd.read_csv('data/creditcard_2023.csv')
data.drop(columns="id", inplace=True)

# Separação dos dados entre treino e teste

train, test = train_test_split(data, test_size=0.3, random_state=42)
X_train = train.drop(columns=['Class'])
X_test = test.drop(columns=['Class'])
y_train = train['Class']
y_test = test['Class']

# Setando tags

tags = {
        "Projeto": "MLflow - Detecção de Fraude em cartões de crédito",
        "team": "Renato Moraes - Data Science",
        "dataset": "Kaggle Credit Card 2023"
       }

with mlflow.start_run(run_name="Detecção de Fraude - Random Forest",
                      description="Este é um experimento de detecção de fraude usando Random Forest."):
    mlflow.log_params(({"Dataset": "data/creditcard_2023.csv", "n_estimators": 100, "random_state": 42}))
    scaler = StandardScaler()
    random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
    pipe = Pipeline([('scaler', StandardScaler()), ('random_forest', random_forest)])
    pipe.fit(X_train, y_train)
    y_pred_rf_mlflow = pipe.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred_rf_mlflow)
    precision = precision_score(y_test, y_pred_rf_mlflow)
    recall = recall_score(y_test, y_pred_rf_mlflow)
    f1 = f1_score(y_test, y_pred_rf_mlflow)
     
    mlflow.set_tags(tags)
    mlflow.log_metric("Accuracy", float(accuracy))
    mlflow.log_metric("Precision", float(precision))
    mlflow.log_metric("Recall", float(recall))
    mlflow.log_metric("F1-Score", float(f1))