<a href="https://colab.research.google.com/github/PranavShashidhara/Country-Default-Prediction/blob/main/Debt_default_and_Time_series_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import lightgbm as lgb
from imblearn.over_sampling import ADASYN
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.metrics import make_scorer, precision_score, recall_score, classification_report, confusion_matrix
import pandas as pd
import numpy as np
from sklearn.ensemble import VotingClassifier

data_df = pd.read_csv('World_Bank_Cleared.csv')

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [2]:
# Prepare data
X = data_df.drop('DEFAULT', axis=1)
y = data_df['DEFAULT']

# Apply ADASYN to balance class distribution
adasyn = ADASYN(sampling_strategy=0.1, random_state=42)
X_resampled, y_resampled = adasyn.fit_resample(X, y)

# Convert 'Year' column to numeric before fitting the model
X_resampled['Year'] = pd.to_numeric(X_resampled['Year'], errors='coerce')
X = X.dropna()  # Remove any rows with missing values before splitting

# Replace special characters and spaces in column names
X_resampled.columns = X_resampled.columns.str.replace('[^a-zA-Z0-9_]', '', regex=True)

# Initialize LightGBM model for recall (Model 0)
model_recall = lgb.LGBMClassifier(
    boosting_type='gbdt',
    objective='binary',
    metric='recall',
    class_weight='balanced',
    num_leaves=30,
    learning_rate=0.01,
    min_split_gain=0.01,
    min_child_samples=30,
    n_estimators=50,
    lambda_l2=1,
    max_depth=6,
    random_state=42
)

# Initialize LightGBM model for precision (Model 1)
model_precision = lgb.LGBMClassifier(
    boosting_type='gbdt',
    objective='binary',
    metric='precision',
    class_weight='balanced',
    num_leaves=30,
    learning_rate=0.01,
    min_split_gain=0.01,
    min_child_samples=30,
    n_estimators=50,
    lambda_l2=1,
    max_depth=6,
    random_state=42
)

# Define custom scoring functions for precision and recall
def custom_recall(y_true, y_pred):
    return recall_score(y_true, y_pred)

def custom_precision(y_true, y_pred):
    return precision_score(y_true, y_pred)

# Stratified K-Folds cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store the predictions from both models for combining them later
recall_preds = []
precision_preds = []
voting_clf = VotingClassifier(
    estimators=[('recall', model_recall), ('precision', model_precision)],
    voting='hard'
)

# Train the voting classifier on the resampled data
voting_clf.fit(X_resampled, y_resampled)

# Make predictions using the voting classifier
y_pred_voting = voting_clf.predict(X_resampled)

# Evaluate the performance of the voting classifier
print(classification_report(y_resampled, y_pred_voting))
print(confusion_matrix(y_resampled, y_pred_voting))


[LightGBM] [Info] Number of positive: 1711, number of negative: 16827
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004413 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5721
[LightGBM] [Info] Number of data points in the train set: 18538, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 1711, number of negative: 16827
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032773 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5721
[LightGBM] [Info] Number of data points in the train set: 18538, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[Lig