In [2]:
import pandas as pd
import numpy as np

from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import lightgbm as lgb

import optuna
optuna.logging.set_verbosity(optuna.logging.DEBUG)
from sklearn.model_selection import StratifiedKFold

import pickle
import time

## Read Train/Test Files

In [3]:
train_df = pd.read_parquet('../data/processed/ciclo02/train_df_with_full_features_v2.parquet')
test_df = pd.read_parquet('../data/processed/ciclo02/test_df_with_full_features_v2.parquet')

In [4]:
train_df.dtypes

tx_amount                                 float64
tx_time_seconds                             int64
tx_time_days                                int64
tx_fraud                                    int64
mean_nb_tx_per_day                        float64
                                           ...   
high_value_tx                                bool
unusual_hour                                 bool
frequent_tx                                  bool
consecutive_transactions_same_terminal       bool
tx_period_day_fe                          float64
Length: 82, dtype: object

In [5]:
test_df.dtypes

tx_amount                                 float64
tx_time_seconds                             int64
tx_time_days                                int64
tx_fraud                                    int64
mean_nb_tx_per_day                        float64
                                           ...   
high_value_tx                                bool
unusual_hour                                 bool
frequent_tx                                  bool
consecutive_transactions_same_terminal       bool
tx_period_day_fe                          float64
Length: 82, dtype: object

# Comparando os modelos a partir da quantidade de features

In [6]:
def train_models(X_train, y_train):
    trained_models = {}

    # -----------------------------
    # Random Forest
    # -----------------------------
    print("Treinando Random Forest...")
    start = time.time()

    rf_model = RandomForestClassifier(
        n_estimators=200,
        max_depth=7,
        min_samples_split=15,
        min_samples_leaf=7,
        max_features='sqrt',
        class_weight='balanced_subsample',
        bootstrap=True,
        random_state=42,
        n_jobs=-1
    )
    rf_model.fit(X_train, y_train)
    print(f"Random Forest treinado em {time.time() - start:.2f} segundos\n")
    trained_models['random_forest'] = rf_model

    # -----------------------------
    # XGBoost
    # -----------------------------
    print("Treinando XGBoost...")
    start = time.time()

    xgb_model = XGBClassifier(
        n_estimators=200,
        max_depth=4,
        learning_rate=0.05,
        colsample_bytree=0.8,
        scale_pos_weight=3,
        min_child_weight=10,
        gamma=5,
        subsample=0.7,
        reg_lambda=10,
        random_state=42,
        enable_categorical=True,
        n_jobs=-1
    )
    xgb_model.fit(X_train, y_train)
    print(f"XGBoost treinado em {time.time() - start:.2f} segundos\n")
    trained_models['xgboost'] = xgb_model

    # -----------------------------
    # LightGBM
    # -----------------------------
    print("Treinando LightGBM...")
    start = time.time()

    lgb_model = lgb.LGBMClassifier(
        n_estimators=200,
        max_depth=4,
        learning_rate=0.05,
        colsample_bytree=0.8,
        min_child_weight=5,
        reg_lambda=10,
        subsample=0.7,
        force_row_wise=True,
        min_split_gain=0.01,
        scale_pos_weight=2.0,
        random_state=42,
        verbose=-1,
        n_jobs=-1
    )
    lgb_model.fit(X_train, y_train)
    print(f"LightGBM treinado em {time.time() - start:.2f} segundos\n")
    trained_models['lightgbm'] = lgb_model

    return trained_models

In [7]:
X_train, y_train = train_df.drop(columns=["tx_fraud"]), train_df["tx_fraud"]
X_test, y_test = test_df.drop(columns=["tx_fraud"]), test_df["tx_fraud"]
models_full_features = train_models(X_train, y_train)

Treinando Random Forest...
Random Forest treinado em 428.11 segundos

Treinando XGBoost...
XGBoost treinado em 44.33 segundos

Treinando LightGBM...
LightGBM treinado em 37.63 segundos



In [8]:
X_train, y_train = train_df.drop(columns=["tx_fraud"]), train_df["tx_fraud"]
X_test, y_test = test_df.drop(columns=["tx_fraud"]), test_df["tx_fraud"]
models_some_features = train_models(X_train, y_train)

Treinando Random Forest...
Random Forest treinado em 387.57 segundos

Treinando XGBoost...
XGBoost treinado em 45.01 segundos

Treinando LightGBM...
LightGBM treinado em 44.39 segundos



In [9]:
importance_list = ['tx_amount_median_ratio', 'tx_amount_to_mean_ration', 'max_amount_per_customer_last_12h', 'high_value_tx',
                   'max_amount_per_customer_last_8h', 'outlier_tx', 'max_amount_per_customer_last_24h', 'max_amount_per_customer_last_4h',
                   'std_amount', 'tx_time_days', 'tx_time_seconds', 'total_amount_per_customer_last_2h', 'max_amount_per_customer_last_1h',
                   'mean_amount', 'amount_zscore_per_customer_last_24h', 'std_amount_per_customer_last_24h', 'month', 'tx_amount_variation',
                   'tx_amount_hour_mean', 'median_amount_per_customer_last_4h', 'total_amount_per_customer_last_1h',
                   'mean_amount_per_customer_last_1h', 'amount_zscore_per_customer_last_1h', 'ratio_total_transactions_per_customer_last_1h_to_2h',
                   'mean_amount_per_customer_last_24h', 'ratio_mean_amount_per_customer_last_1h_to_24h', 'tx_amount']

X_train_new = X_train[importance_list]
X_test_new = X_test[importance_list]

In [10]:
models_some_features = train_models(X_train_new, y_train)

Treinando Random Forest...
Random Forest treinado em 340.67 segundos

Treinando XGBoost...
XGBoost treinado em 27.36 segundos

Treinando LightGBM...
LightGBM treinado em 19.24 segundos



In [11]:
def avaliar_multiplos_modelos(models, X_train, y_train, X_test, y_test, threshold=0.5):
    """
    Avalia m√∫ltiplos modelos de Machine Learning comparando desempenho em treino e teste.

    Par√¢metros:
    - models: dicion√°rio de modelos treinados { "Nome do Modelo": modelo_treinado }
    - X_train: features de treino
    - y_train: labels de treino
    - X_test: features de teste
    - y_test: labels de teste
    - threshold: valor limite para converter probabilidades em classes (default = 0.5)

    Retorna:
    - DataFrame com m√©tricas de treino e teste para cada modelo
    - Exibe matrizes de confus√£o para cada modelo
    """

    results = []

    for name, model in models.items():
        print(f"\n=== Avaliando o Modelo: {name} ===")

        # Predi√ß√µes em treino e teste
        y_train_proba = model.predict_proba(X_train)[:, 1]
        y_test_proba = model.predict_proba(X_test)[:, 1]

        y_train_pred = (y_train_proba >= threshold).astype(int)
        y_test_pred = (y_test_proba >= threshold).astype(int)

        # C√°lculo das m√©tricas
        train_auc = roc_auc_score(y_train, y_train_proba)
        train_precision = precision_score(y_train, y_train_pred)
        train_recall = recall_score(y_train, y_train_pred)
        train_f1 = f1_score(y_train, y_train_pred)

        test_auc = roc_auc_score(y_test, y_test_proba)
        test_precision = precision_score(y_test, y_test_pred)
        test_recall = recall_score(y_test, y_test_pred)
        test_f1 = f1_score(y_test, y_test_pred)

        # Adiciona resultados ao DataFrame
        results.append([name, train_auc, test_auc, train_precision, test_precision, train_recall, test_recall, train_f1, test_f1])

    # Criar um DataFrame com os resultados
    df_results = pd.DataFrame(results, columns=[
        "Modelo", "AUC-ROC (Treino)", "AUC-ROC (Teste)", 
        "Precision (Treino)", "Precision (Teste)", 
        "Recall (Treino)", "Recall (Teste)", 
        "F1-Score (Treino)", "F1-Score (Teste)"
    ])
    
    return df_results

In [12]:
models = {
    "XGBoost": models_full_features['xgboost'],
    "LightGBM": models_full_features['lightgbm'],
    "Random Forest": models_full_features['random_forest']
}

df_resultados = avaliar_multiplos_modelos(models, X_train, y_train, X_test, y_test, threshold=0.4)


=== Avaliando o Modelo: XGBoost ===

=== Avaliando o Modelo: LightGBM ===

=== Avaliando o Modelo: Random Forest ===


In [13]:
df_resultados

Unnamed: 0,Modelo,AUC-ROC (Treino),AUC-ROC (Teste),Precision (Treino),Precision (Teste),Recall (Treino),Recall (Teste),F1-Score (Treino),F1-Score (Teste)
0,XGBoost,0.781327,0.671672,0.903584,0.895118,0.345417,0.314286,0.49978,0.465226
1,LightGBM,0.782658,0.673323,0.927784,0.925822,0.343162,0.313016,0.501013,0.467853
2,Random Forest,0.751372,0.664489,0.010159,0.009357,0.909808,0.912381,0.020095,0.018524


In [14]:
models = {
    "XGBoost_some_features": models_some_features['xgboost'],
    "LightGBM_some_features": models_some_features['lightgbm'],
    "Random Forest_some_features": models_some_features['random_forest']
}

df_resultados_new = avaliar_multiplos_modelos(models, X_train_new, y_train, X_test_new, y_test, threshold=0.4)


=== Avaliando o Modelo: XGBoost_some_features ===

=== Avaliando o Modelo: LightGBM_some_features ===

=== Avaliando o Modelo: Random Forest_some_features ===


In [15]:
df_resultados_new

Unnamed: 0,Modelo,AUC-ROC (Treino),AUC-ROC (Teste),Precision (Treino),Precision (Teste),Recall (Treino),Recall (Teste),F1-Score (Treino),F1-Score (Teste)
0,XGBoost_some_features,0.770137,0.673567,0.899414,0.895212,0.34585,0.314603,0.499593,0.465586
1,LightGBM_some_features,0.765487,0.675194,0.928555,0.929312,0.343769,0.313016,0.501772,0.468297
2,Random Forest_some_features,0.747787,0.673306,0.009893,0.011306,0.920822,0.735873,0.019576,0.02227


Ent√£o, como podemos ver, a perca selecionando uma menos quantidade de features √© irris√≥ria, sendo assim, vamos continuar com estas features.
Como a RF se comportou muito ruim, a partir daqui, faremos a cross validation apenas com a XGBoost e a LightGBM.

# Cross Validation

In [16]:
def cross_validate_models(models, df, target_col, n_splits=5, threshold=0.5):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    tx_amount = df['tx_amount']

    results = []
    model_predictions = {}

    for name, model in models.items():
        print(f"\n Treinando e validando: {name} para o threshold: {threshold}")
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

        aucs, precisions, recalls, f1s, perdas = [], [], [], [], []
        fold_preds = []

        start = time.time()
        for train_idx, val_idx in skf.split(X, y):
            X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
            y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
            tx_val = tx_amount.iloc[val_idx]

            model.fit(X_train, y_train)
            y_proba = model.predict_proba(X_val)[:, 1]
            y_pred = (y_proba >= threshold).astype(int)

            aucs.append(roc_auc_score(y_val, y_proba))
            precisions.append(precision_score(y_val, y_pred, zero_division=0))
            recalls.append(recall_score(y_val, y_pred, zero_division=0))
            f1s.append(f1_score(y_val, y_pred, zero_division=0))

            df_val = pd.DataFrame({
                "y_true": y_val.values,
                "y_pred": y_pred,
                "tx_amount": tx_val.values
            }, index=val_idx)
            perda_fn = df_val[(df_val["y_true"] == 1) & (df_val["y_pred"] == 0)]["tx_amount"].sum()
            perda_fp = df_val[(df_val["y_true"] == 0) & (df_val["y_pred"] == 1)]["tx_amount"].sum()
            perdas.append(perda_fn + perda_fp)
            
            fold_preds.append(pd.DataFrame({
                "y_true": y_val,
                "y_pred": y_pred,
                "y_score": y_proba
            }, index=val_idx))

        end = time.time()
        print(f"{name} finalizado em {end - start:.2f} segundos.")

        model_predictions[name] = pd.concat(fold_preds).sort_index()

        results.append({
            "Modelo": name,
            "threshold": threshold,
            "AUC-ROC (M√©dia)": np.mean(aucs),
            "AUC-ROC (Desvio)": np.std(aucs),
            "Precision (M√©dia)": np.mean(precisions),
            "Precision (Desvio)": np.std(precisions),
            "Recall (M√©dia)": np.mean(recalls),
            "Recall (Desvio)": np.std(recalls),
            "F1-Score (M√©dia)": np.mean(f1s),
            "F1-Score (Desvio)": np.std(f1s),
            "Perda Total do Modelo (M√©dia)": np.mean(perdas),
            "Perda Total do Modelo (Desvio)": np.std(perdas)
        })

    df_cv_results = pd.DataFrame(results)
    return df_cv_results, model_predictions

In [17]:
importance_list_with_target = ['tx_amount_median_ratio', 'tx_amount_to_mean_ration', 'max_amount_per_customer_last_12h', 'high_value_tx',
                   'max_amount_per_customer_last_8h', 'outlier_tx', 'max_amount_per_customer_last_24h', 'max_amount_per_customer_last_4h',
                   'std_amount', 'tx_time_days', 'tx_time_seconds', 'total_amount_per_customer_last_2h', 'max_amount_per_customer_last_1h',
                   'mean_amount', 'amount_zscore_per_customer_last_24h', 'std_amount_per_customer_last_24h', 'month', 'tx_amount_variation',
                   'tx_amount_hour_mean', 'median_amount_per_customer_last_4h', 'total_amount_per_customer_last_1h',
                   'mean_amount_per_customer_last_1h', 'amount_zscore_per_customer_last_1h', 'ratio_total_transactions_per_customer_last_1h_to_2h',
                   'mean_amount_per_customer_last_24h', 'ratio_mean_amount_per_customer_last_1h_to_24h', 'tx_amount', 'tx_fraud']

In [18]:
train_df = train_df[importance_list_with_target]
test_df = test_df[importance_list_with_target]

In [19]:
xgb_model = XGBClassifier(
    n_estimators=200,
    max_depth=4,
    learning_rate=0.05,
    colsample_bytree=0.8,
    scale_pos_weight=3,
    min_child_weight=10,
    gamma=5,
    subsample=0.7,
    reg_lambda=10,
    random_state=42,
    enable_categorical=True,
    n_jobs=-1
)

lgb_model = lgb.LGBMClassifier(
    n_estimators=200,
    max_depth=4,
    learning_rate=0.05,
    colsample_bytree=0.8,
    min_child_weight=5,
    reg_lambda=10,
    subsample=0.7,
    force_row_wise=True,
    min_split_gain=0.01,
    scale_pos_weight=2.0,
    random_state=42,
    verbose=-1,
    n_jobs=-1
)

In [20]:
models = {
    "XGBoost": xgb_model,
    "LightGBM": lgb_model
}

df_cv_results, model_predictions = cross_validate_models(
                                    models,
                                    train_df, 
                                    target_col="tx_fraud", 
                                    n_splits=5, 
                                    threshold=0.4)


 Treinando e validando: XGBoost para o threshold: 0.4
XGBoost finalizado em 103.67 segundos.

 Treinando e validando: LightGBM para o threshold: 0.4
LightGBM finalizado em 97.61 segundos.


In [21]:
df_cv_results

Unnamed: 0,Modelo,threshold,AUC-ROC (M√©dia),AUC-ROC (Desvio),Precision (M√©dia),Precision (Desvio),Recall (M√©dia),Recall (Desvio),F1-Score (M√©dia),F1-Score (Desvio),Perda Total do Modelo (M√©dia),Perda Total do Modelo (Desvio)
0,XGBoost,0.4,0.722094,0.006313,0.900765,0.013426,0.344201,0.011075,0.498007,0.012918,97373.43,1663.935422
1,LightGBM,0.4,0.724599,0.007997,0.929254,0.013524,0.34238,0.010495,0.500301,0.011867,93482.996,1899.09426


Os modelos est√£o bem pr√≥ximos um do outro.

 1. AUC-ROC (0.722 e 0.724), indica boa separa√ß√£o entre transa√ß√µes leg√≠timas e fraudulentas. Talvez se melhorar a feature engineering ou o balanceamento das classes consigamos resultados melhores.
 2. Recall relativamente baixo, o que significa que o modelo est√° deixando passar mais de 65% das fraudes
 3. Precision bastante alta, os modelos quase n√£o errma falsos positivos (poucas transa√ß√µes leg√≠timas est√£o sendo classificadas como fraude), acertando mais de 90%.
 4. Melhor F1-Score (~0.50), tem um equil√≠brio razo√°vel entre precision e recall, tendo uma prefer√™ncia em evitar frustra√ß√µes com clientes, ou seja, √©
estamos considerando que √© melhor receber uma fraude do que considerar uma transa√ß√£o leg√≠tima como fraudulenta.

Vamos tentar alterar o threshold e ver se conseguimos melhorar o recall sem perder tanta precision.

In [22]:
models = {
    "XGBoost": xgb_model,
    "LightGBM": lgb_model
}
# df_results_with_differents_thresholds = pd.DataFrame()
# for threshold in np.arange(0.1, 0.91, 0.05):
#     df_cv_results, model_predictions = cross_validate_models(
#                                         models,
#                                         train_df, 
#                                         target_col="tx_fraud", 
#                                         n_splits=5, 
#                                         threshold=threshold)
#     df_results_with_differents_thresholds = pd.concat([df_results_with_differents_thresholds, df_cv_results])

In [23]:
# df_results_with_differents_thresholds

O caso de threshold igual a 0.3  √© o melhor, levando em conta o f1-score como m√©trica. Ent√£o, como os dois modelos est√£o muito pr√≥ximos dos resultados, vamos decidir qual ir√° para produ√ß√£o ap√≥s o fine-tuning.

# Hiper Parameter fine tuning

In [24]:
# ---------------------
# Fun√ß√£o de tuning XGBoost
# ---------------------
def objective_xgb(trial, X, y):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200,400),
        "max_depth": trial.suggest_int("max_depth", 3, 8),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 1, 10),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 15),
        "gamma": trial.suggest_float("gamma", 0, 10),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0, 20),
        "random_state": 42,
        "enable_categorical": True,
        "n_jobs": -1
    }

    model = XGBClassifier(**params)
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    f1_scores = []
    for train_idx, val_idx in skf.split(X, y):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        model.fit(X_train, y_train)
        y_proba = model.predict_proba(X_val)[:, 1]
        y_pred = (y_proba >= 0.3).astype(int)
        f1_scores.append(f1_score(y_val, y_pred, zero_division=0))

    return np.mean(f1_scores)

# ---------------------
# Fun√ß√£o de tuning LightGBM
# ---------------------
def objective_lgb(trial, X, y):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200,400),
        "max_depth": trial.suggest_int("max_depth", 3, 8),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 15),
        "reg_lambda": trial.suggest_float("reg_lambda", 0, 20),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "force_row_wise": True,
        "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 0.1),
        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 1, 10),
        "random_state": 42,
        "verbose": -1,
        "n_jobs": -1
    }

    model = lgb.LGBMClassifier(**params)
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    f1_scores = []
    for train_idx, val_idx in skf.split(X, y):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        model.fit(X_train, y_train)
        y_proba = model.predict_proba(X_val)[:, 1]
        y_pred = (y_proba >= 0.3).astype(int)
        f1_scores.append(f1_score(y_val, y_pred, zero_division=0))

    return np.mean(f1_scores)

# ---------------------
# Execu√ß√£o dos estudos com seguran√ßa
# ---------------------
def run_optuna_tuning(X, y, n_trials=30):
    # XGBoost
    print("üîç Iniciando tuning XGBoost...")
    start_xgb = time.time()
    study_xgb = optuna.create_study(
        direction="maximize",
        sampler=optuna.samplers.TPESampler(seed=42),
        pruner=optuna.pruners.NopPruner()
    )
    study_xgb.optimize(lambda trial: objective_xgb(trial, X, y), n_trials=n_trials)
    end_xgb = time.time()

    # LightGBM
    print("üîç Iniciando tuning LightGBM...")
    start_lgb = time.time()
    study_lgb = optuna.create_study(
        direction="maximize",
        sampler=optuna.samplers.TPESampler(seed=42),
        pruner=optuna.pruners.NopPruner()
    )
    study_lgb.optimize(lambda trial: objective_lgb(trial, X, y), n_trials=n_trials)
    end_lgb = time.time()

    # Resultados
    print("\n‚úÖ Tuning finalizado.")
    print(f"üß† Melhor F1 XGBoost: {study_xgb.best_value:.4f} | Tempo: {end_xgb - start_xgb:.2f}s")
    print(f"üß† Melhor F1 LightGBM: {study_lgb.best_value:.4f} | Tempo: {end_lgb - start_lgb:.2f}s")

    return {
        "xgb_best_params": study_xgb.best_params,
        "xgb_best_f1": study_xgb.best_value,
        "xgb_time": end_xgb - start_xgb,
        "lgb_best_params": study_lgb.best_params,
        "lgb_best_f1": study_lgb.best_value,
        "lgb_time": end_lgb - start_lgb
    }

In [25]:
X = train_df.drop(columns=["tx_fraud"])
y = train_df["tx_fraud"]

# results = run_optuna_tuning(X, y, n_trials=100)
#üß† Melhor F1 XGBoost: 0.5031 | Tempo: 19200.40s
#üß† Melhor F1 LightGBM: 0.5026 | Tempo: 16470.21s

lighgbm

[I 2025-04-02 01:48:03,449] Trial 75 finished with value: 0.5030853952665736 and parameters: {'n_estimators': 367, 'max_depth': 8, 'learning_rate': 0.04812699015094967, 'colsample_bytree': 0.733164488180626, 'scale_pos_weight': 1.5804975332782478, 'min_child_weight': 1, 'gamma': 2.147697863163816, 'subsample': 0.9058532872360417, 'reg_lambda': 6.455519903139879}. Best is trial 75 with value: 0.5030853952665736.

xgboost

[I 2025-04-01 19:14:28,831] Trial 49 finished with value: 0.5026675561265523 and parameters: {'max_depth': 8, 'learning_rate': 0.0495178354733186, 'colsample_bytree': 0.7366553025783216, 'scale_pos_weight': 1.3065266644896405, 'min_child_weight': 1, 'gamma': 1.971480173088953, 'subsample': 0.9259495710685184, 'reg_lambda': 6.509533314575774}. Best is trial 49 with value: 0.5026675561265523.

# Modelos Ap√≥s Fine Tuning

In [26]:
importance_list_with_target = ['tx_amount_median_ratio', 'tx_amount_to_mean_ration', 'max_amount_per_customer_last_12h', 'high_value_tx',
                   'max_amount_per_customer_last_8h', 'outlier_tx', 'max_amount_per_customer_last_24h', 'max_amount_per_customer_last_4h',
                   'std_amount', 'tx_time_days', 'tx_time_seconds', 'total_amount_per_customer_last_2h', 'max_amount_per_customer_last_1h',
                   'mean_amount', 'amount_zscore_per_customer_last_24h', 'std_amount_per_customer_last_24h', 'month', 'tx_amount_variation',
                   'tx_amount_hour_mean', 'median_amount_per_customer_last_4h', 'total_amount_per_customer_last_1h',
                   'mean_amount_per_customer_last_1h', 'amount_zscore_per_customer_last_1h', 'ratio_total_transactions_per_customer_last_1h_to_2h',
                   'mean_amount_per_customer_last_24h', 'ratio_mean_amount_per_customer_last_1h_to_24h', 'tx_amount', 'tx_fraud']

train_df = train_df[importance_list_with_target]
test_df = test_df[importance_list_with_target]

In [27]:
params_xgb = {'n_estimators': 367, 'max_depth': 8, 'learning_rate': 0.04812699015094967, 'colsample_bytree': 0.733164488180626, 'scale_pos_weight': 1.5804975332782478, 'min_child_weight': 1, 'gamma': 2.147697863163816, 'subsample': 0.9058532872360417, 'reg_lambda': 6.455519903139879}

params_lgb = {'n_estimators': 372, 'max_depth': 6, 'learning_rate': 0.08547122682859054, 'colsample_bytree': 0.6398669811350725, 'min_child_weight': 15, 'reg_lambda': 7.9992709797509125, 'subsample': 0.942434577307351, 'min_split_gain': 0.05705147926792379, 'scale_pos_weight': 1.333874892668437}

In [28]:
xgb_model_tuned = XGBClassifier(**params_xgb)
# xgb_model_tuned.fit(X_train, y_train)

In [29]:
lgb_model_tuned = lgb.LGBMClassifier(**params_lgb)
# lgb_model_tuned.fit(X_train, y_train)

In [30]:
train_df

Unnamed: 0,tx_amount_median_ratio,tx_amount_to_mean_ration,max_amount_per_customer_last_12h,high_value_tx,max_amount_per_customer_last_8h,outlier_tx,max_amount_per_customer_last_24h,max_amount_per_customer_last_4h,std_amount,tx_time_days,...,tx_amount_hour_mean,median_amount_per_customer_last_4h,total_amount_per_customer_last_1h,mean_amount_per_customer_last_1h,amount_zscore_per_customer_last_1h,ratio_total_transactions_per_customer_last_1h_to_2h,mean_amount_per_customer_last_24h,ratio_mean_amount_per_customer_last_1h_to_24h,tx_amount,tx_fraud
0,2.028060,2.049276,123.59,False,123.59,True,123.59,123.59,27.120684,0,...,67.902143,123.590,123.59,123.59,0.0,1.0,123.590000,1.000000,123.59,0
1,1.269117,1.282394,123.59,False,77.34,False,123.59,77.34,27.120684,0,...,52.021818,77.340,77.34,77.34,0.0,1.0,100.465000,0.769820,77.34,0
2,0.763210,0.771194,123.59,False,77.34,False,123.59,77.34,27.120684,0,...,51.218750,61.925,46.51,46.51,0.0,0.5,82.480000,0.563894,46.51,0
3,0.897932,0.907326,54.72,False,54.72,False,77.34,54.72,27.120684,1,...,73.978000,54.720,54.72,54.72,0.0,1.0,59.523333,0.919303,54.72,0
4,1.038727,1.049593,63.30,False,63.30,False,77.34,63.30,27.120684,1,...,64.634737,63.300,63.30,63.30,0.0,1.0,60.467500,1.046843,63.30,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1403319,1.210540,1.188828,47.55,False,47.55,False,47.55,47.55,18.520773,144,...,42.028571,37.130,47.55,47.55,0.0,1.0,39.456667,1.205120,47.55,0
1403320,1.670316,1.640358,65.61,False,65.61,False,65.61,65.61,18.520773,144,...,41.549714,56.580,65.61,65.61,0.0,1.0,45.995000,1.426459,65.61,0
1403321,0.805244,0.790802,31.63,False,31.63,False,31.63,31.63,18.520773,145,...,39.353077,31.630,31.63,31.63,0.0,1.0,31.630000,1.000000,31.63,0
1403322,0.507128,0.498033,19.92,False,19.92,False,31.63,19.92,18.520773,146,...,33.205714,19.920,19.92,19.92,0.0,1.0,25.775000,0.772842,19.92,0


In [31]:
models = {
    "XGBoost": xgb_model_tuned,
    "LightGBM": lgb_model_tuned
}

df_cv_results, model_predictions = cross_validate_models(
                                    models,
                                    train_df, 
                                    target_col="tx_fraud", 
                                    n_splits=5, 
                                    threshold=0.3)


 Treinando e validando: XGBoost para o threshold: 0.3
XGBoost finalizado em 228.87 segundos.

 Treinando e validando: LightGBM para o threshold: 0.3
LightGBM finalizado em 140.67 segundos.


In [32]:
df_cv_results

Unnamed: 0,Modelo,threshold,AUC-ROC (M√©dia),AUC-ROC (Desvio),Precision (M√©dia),Precision (Desvio),Recall (M√©dia),Recall (Desvio),F1-Score (M√©dia),F1-Score (Desvio),Perda Total do Modelo (M√©dia),Perda Total do Modelo (Desvio)
0,XGBoost,0.3,0.723885,0.005926,0.918201,0.013473,0.34689,0.010896,0.503436,0.01196,94039.414,1917.60402
1,LightGBM,0.3,0.722663,0.004795,0.927474,0.012647,0.345763,0.010545,0.503694,0.012857,92662.056,1642.488489


### Compara√ß√£o de Resultados Final:

| Modelo   | Baseline (Perda Sem Modelo) |       Ciclo 01        |        Ciclo 02       |     Ciclo Atual  |  Redu√ß√£o % vs. Baseline |
|:---------|:----------------------------|:----------------------|:----------------------|:-----------------|------------------------|
| XGBoost  |         `R$ 411.671,78`      |     `R$ 286.230,30`    |     `R$ 138.112,33`    |   `R$ 94.039,41`  |       `77,16%`          |
| LightGBM |         `R$ 411.671,78`      |     `R$ 286.230,30`    |     `R$ 131.316,99`    |   `R$ 92.662,05`  |	    `77,49%`          |

# Conclus√£o

No fim, conseguimos um resultado bastante satisfat√≥rio, ap√≥s o fine-tuning. Vale observar que a vari√°√ß√£o dos valores ap√≥s o fine-tuning n√£o mudou tanto. O modelo com menos features aparentemente j√° tinha um bom comporamento.

Para melhorias futuras, talvez seja melhor pensar em adicionar mais features relevantes e melhorar o fine-tuning, visto que o tempo de espera n√£o foi t√£o longo, talvez melhorar o Recall seja uma boa sa√≠da. 

O nosso modelo campe√£o foi o LightGBM, ele ir√° para produ√ß√£o, com os seguintes par√¢metros:
{'n_estimators': 372, 'max_depth': 6, 'learning_rate': 0.08547122682859054, 'colsample_bytree': 0.6398669811350725, 'min_child_weight': 15, 'reg_lambda': 7.9992709797509125, 'subsample': 0.942434577307351, 'min_split_gain': 0.05705147926792379, 'scale_pos_weight': 1.333874892668437}
```
n_estimators=372,
max_depth=6,
learning_rate=0.08547122682859054,
colsample_bytree=0.6398669811350725,
min_child_weight=15,  
reg_lambda=7.9992709797509125,
subsample=0.942434577307351,
force_row_wise=True,
min_split_gain=0.05705147926792379,  
scale_pos_weight=1.333874892668437,  
random_state=42,
verbose=-1
```

Este foi o melhor resultado encontrado.

# M√©tricas ap√≥s cross-validation

|Modelo   | AUC-ROC (M√©dia)| AUC-ROC (Desvio)| Precision (M√©dia)| Precision (Desvio) | Recall (M√©dia) | Recall (Desvio) | F1-Score (M√©dia) | F1-Score (Desvio)|
|:--------|:---------------|:----------------|:-----------------|:-------------------|:---------------|:----------------|:-----------------|:-----------------|
|LightGBM |    0.667328    |    0.009710     |     0.547015     |      0.035519      |    0.211954    |     0.009472    |     0.304879     |    0.006502      | 


# Pr√≥ximos Passos

A ideia √© subir o c√≥digo, talvez para AWS e depois colocar o modelo em produ√ß√£o em um esquema de  

# Salvando o melhor modelo

In [33]:
lgb_model_best = lgb.LGBMClassifier(
                    n_estimators=372,
                    max_depth=6,
                    learning_rate=0.08547122682859054,
                    colsample_bytree=0.6398669811350725,
                    min_child_weight=15,  # Reduzindo de 10 para permitir mais divis√µes
                    reg_lambda=7.9992709797509125,
                    subsample=0.942434577307351,
                    force_row_wise=True,
                    min_split_gain=0.05705147926792379,  # Evita splits sem ganho real
                    scale_pos_weight=1.333874892668437,  # Ajuste para melhorar precis√£o
                    random_state=42,
                    verbose=-1
                )

lgb_model_best.fit(X_train, y_train)

with open("../models/lightgbm_model_ciclo03.pkl", "wb") as file:
    pickle.dump(lgb_model, file)