In [36]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import dice_ml
import joblib

from dice_ml import Dice

## Setup

In [37]:
model = joblib.load('./data/credit_scoring_model.joblib')
scaler = joblib.load('./data/credit_scaler.joblib')
feature_names = joblib.load('./data/feature_names.joblib')

clients_at_risk = pd.read_csv('./data/clients_at_risk.csv')
train_scaled_df = pd.read_csv('./data/train_scaled.csv')

X_train_scaled_df = train_scaled_df.drop('SeriousDlqin2yrs', axis=1)
y_train = train_scaled_df['SeriousDlqin2yrs']

## Experimento com DICE

In [38]:
def translate_dice_deltas(exp_results, query_instance, scaler, feature_names):
    """
    Exibe a diferença exata (+/-) necessária para atingir o contrafatual.
    """

    # 1. Extrair e desescalonar os contrafatuais
    cf_df = exp_results.cf_examples_list[0].final_cfs_df
    features_scaled = cf_df.drop('SeriousDlqin2yrs', axis=1)
    features_raw = scaler.inverse_transform(features_scaled)
    df_cfs = pd.DataFrame(features_raw, columns=feature_names)
    
    # 2. Desescalonar a query_instance original
    query_raw = scaler.inverse_transform(query_instance)
    df_query = pd.DataFrame(query_raw, columns=feature_names)
    
    # 3. Criar DataFrame de Deltas (Diferenças)
    df_diff = df_cfs.copy().astype(object)
    
    for col in feature_names:
        original_val = df_query[col].values[0]
        cf_val = df_cfs[col].values
        
        # Calcula a diferença
        diff = cf_val - original_val
        
        # Aplica a formatação baseada na mudança
        formatted_col = []
        for d in diff:
            if np.isclose(d, 0, atol=1e-5):
                formatted_col.append("-") # Sem mudança
            elif d > 0:
                formatted_col.append(f"+{d:.2f}") # Aumento
            else:
                formatted_col.append(f"{d:.2f}") # Diminuição (o sinal de - já vem no float)
        
        df_diff[col] = formatted_col
        
    # Adicionar o status de aprovação
    df_diff['Inadimplente'] = cf_df['SeriousDlqin2yrs'].values
    
    return df_diff

In [39]:
# --- PASSO 1: Preparação dos Dados Escalonados ---
dice_data = dice_ml.Data(
    dataframe=train_scaled_df, 
    continuous_features=feature_names,
    outcome_name='SeriousDlqin2yrs'
)

# --- PASSO 2: Configuração do Modelo ---
dice_model = dice_ml.Model(model=model, backend="sklearn")

# --- PASSO 3: Inicialização do Explicador ---
dice_exp = dice_ml.Dice(dice_data, dice_model, method="random")

In [40]:
to_remove = ['age', 'NumberOfDependents', 'NumberOfTimes90DaysLate']
features_to_vary = [f for f in feature_names if f not in to_remove]

print(features_to_vary)

['RevolvingUtilizationOfUnsecuredLines', 'NumberOfTime30-59DaysPastDueNotWorse', 'DebtRatio', 'MonthlyIncome', 'NumberOfOpenCreditLinesAndLoans', 'NumberRealEstateLoansOrLines', 'NumberOfTime60-89DaysPastDueNotWorse']


## Experimento 1 - Sem Restrições

In [41]:
query_instance = pd.DataFrame(
    clients_at_risk.iloc[0:1], 
    columns=feature_names
).astype(X_train_scaled_df.dtypes.to_dict())

# Gerar os contrafatuais com as restrições de "Não Aumentar"
exp_results = dice_exp.generate_counterfactuals(
    query_instance, 
    total_CFs=5,
    desired_class=0,
    features_to_vary=feature_names
)

100%|██████████| 1/1 [00:00<00:00,  3.76it/s]


In [42]:
def color_deltas(val):
    if isinstance(val, str):
        if '+' in val: return 'color: green'
        if '-' in val and val != '-': return 'color: red'
    return ''

df_final = translate_dice_deltas(exp_results, query_instance, scaler, feature_names)

df_final.style.map(color_deltas)

Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-,-,-,+25.97,-,-,-,-,0
1,-0.15,-,-,-0.24,-,-,-,-,-,-,0
2,-,-,+9.80,-,+2480834.20,-,-,-,-,-,0
3,-,-,-,+0.06,-,-,-,+7.70,-,-,0
4,+0.15,-,+98.00,-,-,-,-,-,-,-,0


## Experimento em Lote com Restrições Dinâmicas

In [43]:
def generate_batch_with_dynamic_range(exp, query_instances, scaler, feature_names, features_to_vary, num_cfs = 2):
    all_explanations = []
    income_idx = feature_names.index('MonthlyIncome')
    
    # Progresso visual para acompanhar o processamento
    for i in range(len(query_instances)):
        instance = query_instances.iloc[i:i+1]
        
        # 1. Calcula o limite de 10% para ESTA instância específica
        query_real = scaler.inverse_transform(instance)
        current_income = query_real[0][income_idx]
        max_income_real = current_income * 1.1
        
        # 2. Converte para a escala do modelo
        dummy_point = query_real.copy()
        dummy_point[0][income_idx] = max_income_real
        max_income_scaled = scaler.transform(dummy_point)[0][income_idx]

        val_30_59 = instance['NumberOfTime30-59DaysPastDueNotWorse'].values[0]
        val_60_89 = instance['NumberOfTime60-89DaysPastDueNotWorse'].values[0]
        
        # 3. Gera o contrafatual com a restrição personalizada
        dice_exp = exp.generate_counterfactuals(
            instance,
            total_CFs=num_cfs,
            desired_class=0,
            features_to_vary=features_to_vary,
            permitted_range={
                'MonthlyIncome': [instance['MonthlyIncome'].values[0], max_income_scaled],
                'age': [float(instance['age'].values[0]), 100.0],
                'NumberOfTime30-59DaysPastDueNotWorse': [0, val_30_59],
                'NumberOfTime60-89DaysPastDueNotWorse': [0, val_60_89]
            }
        )
        all_explanations.append(dice_exp)
    
    return all_explanations

In [44]:
query_instances = pd.DataFrame(
    clients_at_risk.iloc[0:5], 
    columns=feature_names
).astype(X_train_scaled_df.dtypes.to_dict())

In [45]:
exp_results = generate_batch_with_dynamic_range(dice_exp, query_instances,scaler, 
                                                feature_names, features_to_vary)

100%|██████████| 1/1 [00:00<00:00,  4.17it/s]
100%|██████████| 1/1 [00:00<00:00,  4.14it/s]
100%|██████████| 1/1 [00:00<00:00,  4.34it/s]
100%|██████████| 1/1 [00:00<00:00,  4.15it/s]
100%|██████████| 1/1 [00:00<00:00,  3.53it/s]


In [46]:
for i in range(len(query_instances)):
    
    result = exp_results[i] 
    query_instance = query_instances.iloc[i:i+1]
    
    df_final = translate_dice_deltas(result, query_instance, scaler, feature_names)
    styled_df = df_final.style.map(color_deltas)
    
    print(f"\nExplicação para o Cliente {i}")
    display(styled_df)
    


Explicação para o Cliente 0


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-,-,-,18.4,-,-,-,-,0
1,-,-,-,-,-,36.39,-,-,-,-,0



Explicação para o Cliente 1


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-,+0.76,24.42,-,-,-,-,-,0
1,-,-,+9.80,-,183.56,-,-,-,-,-,0



Explicação para o Cliente 2


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-,-,-,+10.84,-,-,-,-,0
1,-,-,+8.80,-,+1959.48,-,-,-,-,-,0



Explicação para o Cliente 3


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-0.8,-,-,-,-,+49.65,-,-,-,-,0
1,-0.5,-,-,-,-,-,-,+17.40,-,-,0



Explicação para o Cliente 4


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-,-,-,+39.64,-,+28.00,-,-,0
1,-0.42,-,-,-,-,-,-,-,-,-,0


# Dice com restrições

In [64]:
dice_wachter = Dice(
    dice_data,
    dice_model,
    method='genetic'
)

In [65]:
def generate_wachter_cf(
    dice_exp,
    query_instance,
    scaler,
    feature_names,
    features_to_vary,
    total_CFs=3,
    desired_class=0,
    use_constraints=False
):
    income_idx = feature_names.index('MonthlyIncome')

    permitted_range = None

    if use_constraints:
        # --- valores reais ---
        query_real = scaler.inverse_transform(query_instance)
        current_income = query_real[0][income_idx]
        max_income_real = current_income * 1.1

        # --- volta para escala ---
        dummy = query_real.copy()
        dummy[0][income_idx] = max_income_real
        max_income_scaled = scaler.transform(dummy)[0][income_idx]

        val_30_59 = query_instance['NumberOfTime30-59DaysPastDueNotWorse'].values[0]
        val_60_89 = query_instance['NumberOfTime60-89DaysPastDueNotWorse'].values[0]

        permitted_range = {
            'MonthlyIncome': [
                query_instance['MonthlyIncome'].values[0],
                max_income_scaled
            ],
            'age': [
                float(query_instance['age'].values[0]),
                100.0
            ],
            'NumberOfTime30-59DaysPastDueNotWorse': [0, val_30_59],
            'NumberOfTime60-89DaysPastDueNotWorse': [0, val_60_89]
        }

    cf = dice_exp.generate_counterfactuals(
        query_instance,
        total_CFs=total_CFs,
        desired_class=desired_class,
        features_to_vary=features_to_vary,
        permitted_range=permitted_range,
        proximity_weight=0.5,
        diversity_weight=0.1
    )

    return cf


In [66]:
wachter_no_constraints = []

for i in range(len(query_instances)):
    query_instance = query_instances.iloc[i:i+1]

    cf = generate_wachter_cf(
        dice_wachter,
        query_instance,
        scaler,
        feature_names,
        features_to_vary,
        total_CFs=3,
        desired_class=0,
        use_constraints=False
    )

    wachter_no_constraints.append(cf)


100%|██████████| 1/1 [00:01<00:00,  1.13s/it]
100%|██████████| 1/1 [00:01<00:00,  1.12s/it]
100%|██████████| 1/1 [00:01<00:00,  1.09s/it]
100%|██████████| 1/1 [00:01<00:00,  1.09s/it]
100%|██████████| 1/1 [00:01<00:00,  1.10s/it]


In [67]:
wachter_with_constraints = []

for i in range(len(query_instances)):
    query_instance = query_instances.iloc[i:i+1]

    cf = generate_wachter_cf(
        dice_wachter,
        query_instance,
        scaler,
        feature_names,
        features_to_vary,
        total_CFs=3,
        desired_class=0,
        use_constraints=True
    )

    wachter_with_constraints.append(cf)


100%|██████████| 1/1 [00:01<00:00,  1.08s/it]
100%|██████████| 1/1 [00:01<00:00,  1.07s/it]
100%|██████████| 1/1 [00:01<00:00,  1.07s/it]
100%|██████████| 1/1 [00:01<00:00,  1.07s/it]
100%|██████████| 1/1 [00:01<00:00,  1.09s/it]


In [69]:
for i in range(len(query_instances)):

    query_instance = query_instances.iloc[i:i+1]

    print("="*100)
    print(f"CLIENTE {i}")
    print("="*100)

    # --- SEM RESTRIÇÕES ---
    print("\nWachter SEM restrições")
    cf = wachter_no_constraints[i]

    if len(cf.cf_examples_list) > 0:
        df_no = translate_dice_deltas(
            cf,
            query_instance,
            scaler,
            feature_names
        )
        display(df_no.style.map(color_deltas))
    else:
        print("Nenhum contrafactual encontrado")

    # --- COM RESTRIÇÕES ---
    print("\nWachter COM restrições")
    cf = wachter_with_constraints[i]

    if len(cf.cf_examples_list) > 0:
        df_yes = translate_dice_deltas(
            cf,
            query_instance,
            scaler,
            feature_names
        )
        display(df_yes.style.map(color_deltas))
    else:
        print("Nenhum contrafactual encontrado")


CLIENTE 0

Wachter SEM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-0.05,-,-,0.06,836.0,-,-,-1.0,-,-,0
1,-0.05,-,-,-0.04,216.0,-,-,1.9,-,-,0
2,-0.05,-,-,-0.04,-742.0,-4.00,-,-1.0,-,-,0



Wachter COM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-0.05,-,-,0.06,84.84,-,-,-1.0,-,-,0
1,-0.05,-,-,-0.04,346.0,-,-,-1.0,-,-,0
2,0.05,-,-,0.06,296.55,-2.00,-,-1.0,-,-,0


CLIENTE 1

Wachter SEM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,0.0,-,-,-0.04,150.0,-,-,-,-,-,0
1,0.0,-,-,-0.04,-808.0,-,-,-,-,-,0
2,-0.7,-,-,-0.04,-408.0,-,-,-,-,-,0



Wachter COM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,0.0,-,-,0.06,361.86,-,-,-,-,-,0
1,0.0,-,-,-0.04,150.0,-,-,-,-,-,0
2,0.0,-,-,-0.04,352.96,-,-,-,-,-,0


CLIENTE 2

Wachter SEM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,0.03,-,-1.0,-0.09,-10980.0,-,-,-1.0,-,-,0
1,0.03,-,-1.0,0.01,-18930.0,-,-,-1.0,-,-,0
2,0.03,-,-1.0,0.01,-19440.0,-,-,-1.0,-,-,0



Wachter COM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,0.03,-,-1.0,0.01,0.0,-,-,-1.0,-,-,0
1,0.03,-,-1.0,-0.09,0.0,-,-,-1.0,-,-,0
2,0.03,-,-1.0,-0.09,0.0,-,-,-1.0,-,-,0


CLIENTE 3

Wachter SEM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-3.0,0.05,1750.0,-,-2.0,-,-1.0,1.0,0
1,-,-,-3.0,0.05,-3050.0,-2.00,-2.0,-,-1.0,1.0,0
2,-,-,-3.0,0.05,-3570.0,-2.00,-2.0,-,-1.0,1.0,0



Wachter COM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-3.0,0.05,568.34,-2.00,-2.0,-,-1.0,1.0,0
1,-,-,-3.0,-0.05,-0.0,-,-2.0,-,-1.0,1.0,0
2,-,-,-3.0,0.05,49.47,-,-2.0,-,-1.0,1.0,0


CLIENTE 4

Wachter SEM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-0.02,-,-2.0,0.07,250.0,-,-1.0,-1.0,-,1.0,0
1,-0.02,-,-2.0,-0.03,-350.0,-,-1.0,-1.0,-,1.0,0
2,-0.02,-,-2.0,0.07,50.0,-,-1.0,-1.0,-,1.0,0



Wachter COM restrições


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-0.02,-,-2.0,-0.03,0.0,-,-1.0,-1.0,-,1.0,0
1,-0.02,-,-2.0,-0.03,0.0,-,-1.0,-1.0,-,1.0,0
2,-0.02,-,-2.0,0.07,0.0,-,-1.0,-1.0,-,1.0,0
