Load packages

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report

Load dataset

In [4]:
df = pd.read_csv("../credit_risk_dataset_cleaned.csv")

Split dataset into train and test, random state not specified (using 42 as it has been the most common choice in my experience)

In [10]:
df_split = pd.get_dummies(df, drop_first=True)
X = df_split.drop("cb_person_default_on_file_Y", axis=1)
y = df_split["cb_person_default_on_file_Y"] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

AdaBoost model

In [11]:
model = AdaBoostClassifier(random_state=42)
model.fit(X_train, y_train)

train_pred = model.predict(X_train)
test_pred = model.predict(X_test)



Outputting required metrics

In [39]:
metrics_train = classification_report(y_train, train_pred, output_dict=True)
metrics_test = classification_report(y_test, test_pred, output_dict=True)

In [40]:
print(metrics_train)

{'False': {'precision': 0.892431950490376, 'recall': 0.9118482396441777, 'f1-score': 0.9020356234096693, 'support': 18661.0}, 'True': {'precision': 0.5411436541143654, 'recall': 0.48609371084941116, 'f1-score': 0.5121436114044351, 'support': 3991.0}, 'accuracy': 0.8368355995055624, 'macro avg': {'precision': 0.7167878023023707, 'recall': 0.6989709752467944, 'f1-score': 0.7070896174070522, 'support': 22652.0}, 'weighted avg': {'precision': 0.8305393321415919, 'recall': 0.8368355995055624, 'f1-score': 0.8333415116353055, 'support': 22652.0}}


Train subset

In [41]:
print(f"accuracy: {metrics_train['accuracy']:.3f}")
print(f"precision_0: {metrics_train['False']['precision']:.3f}")
print(f"precision_1: {metrics_train['True']['precision']:.3f}")
print(f"recall_0: {metrics_train['False']['recall']:.3f}")
print(f"recall_1: {metrics_train['True']['recall']:.3f}")
print(f"f1_0: {metrics_train['False']['f1-score']:.3f}")
print(f"f1_1: {metrics_train['True']['f1-score']:.3f}")

accuracy: 0.837
precision_0: 0.892
precision_1: 0.541
recall_0: 0.912
recall_1: 0.486
f1_0: 0.902
f1_1: 0.512


Test subset

In [42]:
print(f"accuracy: {metrics_test['accuracy']:.3f}")
print(f"precision_0: {metrics_test['False']['precision']:.3f}")
print(f"precision_1: {metrics_test['True']['precision']:.3f}")
print(f"recall_0: {metrics_test['False']['recall']:.3f}")
print(f"recall_1: {metrics_test['True']['recall']:.3f}")
print(f"f1_0: {metrics_test['False']['f1-score']:.3f}")
print(f"f1_1: {metrics_test['True']['f1-score']:.3f}")

accuracy: 0.827
precision_0: 0.888
precision_1: 0.516
recall_0: 0.904
recall_1: 0.475
f1_0: 0.896
f1_1: 0.495


Results from research paper and our attempt vary greatly, this is caused by lack of precise information about how the dataset was prepared and randomstate for data split