***Equilibrage des données***


***Pour option 1***

In [None]:
df_balanced1 = data_clean.copy()

label_encoder1 = LabelEncoder()
df_balanced1['Risk_encoded'] = label_encoder1.fit_transform(df_balanced1['Risk'])

feature_cols = ['External_Debt_GDP', 'Debt_Service_Exports','GDP_Growth',
                'Current_Account', 'Exchange_Rate',
                'Corruption_Governance', 'Net_ODA_received']

train_mask1 = df_balanced1['Year'] <= 2021
test_mask1 = df_balanced1['Year'] > 2021

X_train_raw1 = df_balanced1.loc[train_mask1, feature_cols]
y_train1 = df_balanced1.loc[train_mask1, 'Risk_encoded']

X_test_raw1 = df_balanced1.loc[test_mask1, feature_cols]
y_test1 = df_balanced1.loc[test_mask1, 'Risk_encoded']


scaler1 = StandardScaler()
X_train_scaled1 = scaler1.fit_transform(X_train_raw1)
X_test_scaled1 = scaler1.transform(X_test_raw1)

X_train_scaled1 = pd.DataFrame(X_train_scaled1, columns=feature_cols, index=X_train_raw1.index)
X_test_scaled1 = pd.DataFrame(X_test_scaled1, columns=feature_cols, index=X_test_raw1.index)


class_counts1 = Counter(y_train1)
max_count1 = max(class_counts1.values())
target_minority1 = max_count1 // 2  # Ratio 1:2

sampling_strategy_dict1 = {}
for class_label, count in class_counts1.items():
    if count < target_minority1:
        sampling_strategy_dict1[class_label] = target_minority1

print(f"Distribution originale Train : {class_counts1}")
print(f"Cible pour les minoritaires : {target_minority1}")

distress_code = label_encoder1.transform(['DISTRESS'])[0] if 'DISTRESS' in label_encoder1.classes_ else -1
if distress_code != -1:
    n_distress = class_counts1[distress_code]
    k = min(5, n_distress - 1)
else:
    k = 5

smote_enn1 = SMOTEENN(
    sampling_strategy=sampling_strategy_dict1,
    smote=SMOTE(k_neighbors=k, random_state=42),
    enn=EditedNearestNeighbours(sampling_strategy='all'),
    random_state=42
)

print("\nApplication de SMOTEENN en cours...")
X_train_resampled1, y_train_resampled1 = smote_enn1.fit_resample(X_train_scaled1, y_train1)

print(f"Nouvelle distribution Train : {Counter(y_train_resampled1)}")


from sklearn.utils.class_weight import compute_class_weight

classes_present1 = np.unique(y_train_resampled1)
weights1 = compute_class_weight(class_weight='balanced', classes=classes_present1, y=y_train_resampled1)
class_weight_dict1 = dict(zip(classes_present1, weights1))

print("\nPoids résiduels (à passer au modèle) :")
for cls, w in class_weight_dict1.items():
    print(f"Classe {label_encoder1.inverse_transform([cls])[0]} : {w:.2f}")


X_train_final = pd.DataFrame(X_train_resampled1, columns=feature_cols)
y_train_final = y_train_resampled1
X_test_final = pd.DataFrame(X_test_scaled1, columns=feature_cols)
y_test_final = y_test1

Distribution originale Train : Counter({3: 198, 2: 128, 1: 122, 0: 35})
Cible pour les minoritaires : 99

Application de SMOTEENN en cours...
Nouvelle distribution Train : Counter({0: 184, 2: 141, 1: 129, 3: 69})

Poids résiduels (à passer au modèle) :
Classe DISTRESS : 0.71
Classe HIGH : 1.01
Classe LOW : 0.93
Classe MODERATE : 1.89
