In [65]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import dice_ml
import joblib

## Setup

In [66]:
model = joblib.load('./data/credit_scoring_model.joblib')
scaler = joblib.load('./data/credit_scaler.joblib')
feature_names = joblib.load('./data/feature_names.joblib')

clients_at_risk = pd.read_csv('./data/clients_at_risk.csv')
train_scaled_df = pd.read_csv('./data/train_scaled.csv')

X_train_scaled_df = train_scaled_df.drop('SeriousDlqin2yrs', axis=1)
y_train = train_scaled_df['SeriousDlqin2yrs']

## Experimento com DICE

In [67]:
def translate_dice_deltas(exp_results, query_instance, scaler, feature_names):
    """
    Exibe a diferença exata (+/-) necessária para atingir o contrafatual.
    """

    # 1. Extrair e desescalonar os contrafatuais
    cf_df = exp_results.cf_examples_list[0].final_cfs_df
    features_scaled = cf_df.drop('SeriousDlqin2yrs', axis=1)
    features_raw = scaler.inverse_transform(features_scaled)
    df_cfs = pd.DataFrame(features_raw, columns=feature_names)
    
    # 2. Desescalonar a query_instance original
    query_raw = scaler.inverse_transform(query_instance)
    df_query = pd.DataFrame(query_raw, columns=feature_names)
    
    # 3. Criar DataFrame de Deltas (Diferenças)
    df_diff = df_cfs.copy().astype(object)
    
    for col in feature_names:
        original_val = df_query[col].values[0]
        cf_val = df_cfs[col].values
        
        # Calcula a diferença
        diff = cf_val - original_val
        
        # Aplica a formatação baseada na mudança
        formatted_col = []
        for d in diff:
            if np.isclose(d, 0, atol=1e-5):
                formatted_col.append("-") # Sem mudança
            elif d > 0:
                formatted_col.append(f"+{d:.2f}") # Aumento
            else:
                formatted_col.append(f"{d:.2f}") # Diminuição (o sinal de - já vem no float)
        
        df_diff[col] = formatted_col
        
    # Adicionar o status de aprovação
    df_diff['Inadimplente'] = cf_df['SeriousDlqin2yrs'].values
    
    return df_diff

In [68]:
# --- PASSO 1: Preparação dos Dados Escalonados ---
dice_data = dice_ml.Data(
    dataframe=train_scaled_df, 
    continuous_features=feature_names,
    outcome_name='SeriousDlqin2yrs'
)

# --- PASSO 2: Configuração do Modelo ---
dice_model = dice_ml.Model(model=model, backend="sklearn")

# --- PASSO 3: Inicialização do Explicador ---
dice_exp = dice_ml.Dice(dice_data, dice_model, method="random")

In [69]:
to_remove = ['age', 'NumberOfDependents', 'NumberOfTimes90DaysLate']
features_to_vary = [f for f in feature_names if f not in to_remove]

print(features_to_vary)

['RevolvingUtilizationOfUnsecuredLines', 'NumberOfTime30-59DaysPastDueNotWorse', 'DebtRatio', 'MonthlyIncome', 'NumberOfOpenCreditLinesAndLoans', 'NumberRealEstateLoansOrLines', 'NumberOfTime60-89DaysPastDueNotWorse']


## Experimento 1 - Sem Restrições

In [70]:
query_instance = pd.DataFrame(
    clients_at_risk.iloc[0:1], 
    columns=feature_names
).astype(X_train_scaled_df.dtypes.to_dict())

# Gerar os contrafatuais com as restrições de "Não Aumentar"
exp_results = dice_exp.generate_counterfactuals(
    query_instance, 
    total_CFs=5,
    desired_class=0,
    features_to_vary=feature_names
)

100%|██████████| 1/1 [00:00<00:00,  3.76it/s]


In [71]:
def color_deltas(val):
    if isinstance(val, str):
        if '+' in val: return 'color: green'
        if '-' in val and val != '-': return 'color: red'
    return ''

df_final = translate_dice_deltas(exp_results, query_instance, scaler, feature_names)

df_final.style.map(color_deltas)

Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,+52.83,-,+0.16,-,-,-,-,-,-,0
1,-,+20.02,-,-,-,-,-,+10.60,-,-,0
2,-0.15,-,-,-,-,-,+29.40,-,-,-,0
3,-,-,-,-0.94,-,-,-,-,+107.80,-,0
4,-,-,-,-,+782918.23,-,-,-,-,-,0


## Experimento em Lote com Restrições Dinâmicas

In [72]:
def generate_batch_with_dynamic_range(exp, query_instances, scaler, feature_names, features_to_vary, num_cfs = 2):
    all_explanations = []
    income_idx = feature_names.index('MonthlyIncome')
    
    # Progresso visual para acompanhar o processamento
    for i in range(len(query_instances)):
        instance = query_instances.iloc[i:i+1]
        
        # 1. Calcula o limite de 10% para ESTA instância específica
        query_real = scaler.inverse_transform(instance)
        current_income = query_real[0][income_idx]
        max_income_real = current_income * 1.1
        
        # 2. Converte para a escala do modelo
        dummy_point = query_real.copy()
        dummy_point[0][income_idx] = max_income_real
        max_income_scaled = scaler.transform(dummy_point)[0][income_idx]

        val_30_59 = instance['NumberOfTime30-59DaysPastDueNotWorse'].values[0]
        val_60_89 = instance['NumberOfTime60-89DaysPastDueNotWorse'].values[0]
        
        # 3. Gera o contrafatual com a restrição personalizada
        dice_exp = exp.generate_counterfactuals(
            instance,
            total_CFs=num_cfs,
            desired_class=0,
            features_to_vary=features_to_vary,
            permitted_range={
                'MonthlyIncome': [instance['MonthlyIncome'].values[0], max_income_scaled],
                'age': [float(instance['age'].values[0]), 100.0],
                'NumberOfTime30-59DaysPastDueNotWorse': [0, val_30_59],
                'NumberOfTime60-89DaysPastDueNotWorse': [0, val_60_89]
            }
        )
        all_explanations.append(dice_exp)
    
    return all_explanations

In [73]:
query_instances = pd.DataFrame(
    clients_at_risk.iloc[0:5], 
    columns=feature_names
).astype(X_train_scaled_df.dtypes.to_dict())

In [74]:
exp_results = generate_batch_with_dynamic_range(dice_exp, query_instances,scaler, 
                                                feature_names, features_to_vary)

100%|██████████| 1/1 [00:00<00:00,  4.42it/s]
100%|██████████| 1/1 [00:00<00:00,  4.33it/s]
100%|██████████| 1/1 [00:00<00:00,  4.36it/s]
100%|██████████| 1/1 [00:00<00:00,  4.49it/s]
100%|██████████| 1/1 [00:00<00:00,  4.43it/s]


In [75]:
for i in range(len(query_instances)):
    
    result = exp_results[i] 
    query_instance = query_instances.iloc[i:i+1]
    
    df_final = translate_dice_deltas(result, query_instance, scaler, feature_names)
    styled_df = df_final.style.map(color_deltas)
    
    print(f"\nExplicação para o Cliente {i}")
    display(styled_df)
    


Explicação para o Cliente 0


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,+0.15,-,-,-,-,-8.62,-,-,-,-,0
1,-,-,-,-,+75.97,-,-,-,-,-,0



Explicação para o Cliente 1


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-,+0.76,-,-,-,-,+9.80,-,0
1,-,-,+9.80,-,-,+24.22,-,-,-,-,0



Explicação para o Cliente 2


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-0.47,-,-,-,-,-,-,+25.10,-,-,0
1,-,-,-,+0.01,-,+6.43,-,-,-,-,0



Explicação para o Cliente 3


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-3.00,-,-,18.33,-,-,-,-,0
1,-0.10,-,-,-,-,26.41,-,-,-,-,0



Explicação para o Cliente 4


Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,Inadimplente
0,-,-,-,-,-,+31.27,-,-,-,-,0
1,-,-,-,-,-,-,-,+1.90,+9.80,-,0
