In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from omegaconf import OmegaConf
import mlflow

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline

import tensorflow as tf

Dense = tf.keras.layers.Dense
Input = tf.keras.layers.Input
Sequential = tf.keras.Sequential
MeanSquaredError = tf.keras.losses.MeanSquaredError
BinaryCrossentropy = tf.keras.losses.BinaryCrossentropy
Sigmoid = tf.keras.activations.sigmoid

file_path = os.getcwd()

conf = OmegaConf.load(os.path.join(file_path, "..", "src", "config.yml"))

mlflow.set_experiment(conf["tracking_uri"]["experiment_name"])

data_path = os.path.join(file_path, "..", "data", "UCI_Credit_Card.csv")

df = pd.read_csv(data_path)

lambdas = [0, 1e-5, 1e-4, 5e-4, 1e-3]

  if not hasattr(np, "object"):
2025/12/28 04:58:01 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/12/28 04:58:01 INFO mlflow.store.db.utils: Updating database tables
2025/12/28 04:58:01 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/28 04:58:01 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2025/12/28 04:58:01 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/28 04:58:01 INFO alembic.runtime.migration: Will assume non-transactional DDL.


In [2]:
def train(df, params):

    # ===============================
    # TARGET
    # ===============================
    y = df['default.payment.next.month']

    # ===============================
    # FEATURES
    # ===============================
    X = df[
        [
            'LIMIT_BAL',
            'SEX',
            'EDUCATION',
            'MARRIAGE',
            'AGE',
            'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6',
            'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3',
            'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6',
            'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3',
            'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6'
        ]
    ]
    
    # ===============================
    # DEFINI√á√ÉO DOS TIPOS
    # ===============================
    binary_features = ['SEX']
    categorical_features = ['EDUCATION', 'MARRIAGE']
    numerical_features = [
        'LIMIT_BAL',
        'AGE',
        'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6',
        'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3',
        'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6',
        'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3',
        'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6'
    ]
    
    # ===============================
    # PREPROCESSAMENTO
    # ===============================
    preprocessor = ColumnTransformer(
        transformers=[
            ('bin', MinMaxScaler(), binary_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
            ('num', MinMaxScaler(), numerical_features)
        ]
    )

    # ===============================
    # SPLIT
    # ===============================
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=params["test_size"],
        random_state=params["random_state"],
        stratify=y
    )
    
	# ===============================
    # TRANSFORMAR DADOS
    # ===============================
    X_train = preprocessor.fit_transform(X_train)
    X_test = preprocessor.transform(X_test)

    results = []
        
    # ===============================
    # TREINAR V√ÅRIOS MODELOS (LAMBDA)
    # ===============================
    for lambda_ in lambdas:
                 
        model = Sequential([
				Dense(64, activation = 'relu', name = 'layer1', kernel_regularizer=tf.keras.regularizers.l2(lambda_)),
				Dense(32, activation = 'relu', name = 'layer2', kernel_regularizer=tf.keras.regularizers.l2(lambda_)),
				Dense(1, activation = 'linear', name = 'layer3')
		], name = 'Credit_Default_Model')

        model.compile(
            optimizer=tf.keras.optimizers.Adam(
                learning_rate=params["learning_rate"]
            ),
            loss=tf.keras.losses.BinaryCrossentropy(from_logits=True)
        )

        model.fit(
            X_train,
            y_train,
            epochs=params["epochs"],
            verbose=params["verbose"]
        )

        # ===============================
        # AVALIA√á√ÉO
        # ===============================
        logits = model(X_test)
        y_pred_proba = tf.nn.sigmoid(logits).numpy().ravel()
        y_pred = (y_pred_proba >= 0.5).astype(int)

        acc = accuracy_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_pred_proba)

        results.append({
            "lambda": lambda_,
            "accuracy": acc,
            "roc_auc": roc_auc,
            "model": model
        })

        print(f"Œª={lambda_:.3f} | Accuracy={acc:.4f} | ROC-AUC={roc_auc:.4f}")

    # ===============================
    # MELHOR MODELO
    # ===============================
    best_model = max(results, key=lambda x: x["roc_auc"])

    print("\nüèÜ Melhor Modelo:")
    print(f"Lambda: {best_model['lambda']}")
    print(f"ROC-AUC: {best_model['roc_auc']:.4f}")

    return best_model


In [None]:
def main():
    
    # Load data
    train(df, conf["parameters"])

if __name__ == "__main__":
    main()
