### Loading libraries and the dataset 

In [1]:
# Loading libraries 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV 
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from scipy.stats import randint 

# Reading the data 
file_path = '../credit_risk_dataset_cleaned.csv'
df = pd.read_csv(file_path)

### Preparation of modeling dataset 

In [2]:
# Assigning features and target columns 
X = df.drop('loan_status', axis=1) 
y = df['loan_status'] 

# Dividing features into categorical and numerical 
categorical_features = X.select_dtypes(include=['object']).columns.tolist() 
numerical_features = X.select_dtypes(exclude=['object']).columns.tolist() 

# One-hot encoding 
X_encoded = pd.get_dummies(X, drop_first=True) 

# Train-test split 
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=37) 

### Training the model 

In [3]:
# Random forest definition 
rf = RandomForestClassifier(random_state=37)

# Hyperparameters space 
param_dist = {
    'n_estimators': randint(100, 500),
    'max_depth': randint(0, 50),
    'min_samples_split': randint(2, 50),
    'min_samples_leaf': randint(2, 50),
    'bootstrap': [True, False]
}

# RandomizedSearchCV 
random_search = RandomizedSearchCV(
    rf,
    param_distributions=param_dist,
    n_iter=100,
    cv=5,
    scoring='f1',
    random_state=37,
    n_jobs=-1
)

# Fitting the model 
random_search.fit(X_train, y_train)

# Final model 
model = random_search.best_estimator_

# Printing best model specification 
print(random_search.best_params_)


15 fits failed out of a total of 500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "/Library/Frameworks/Python.framework/

{'bootstrap': False, 'max_depth': 16, 'min_samples_leaf': 2, 'min_samples_split': 26, 'n_estimators': 378}


### Calculating and displaying obtained results on train set 

In [4]:
# Prediction of train set 
y_pred = model.predict(X_train)

# Calculation of accuracy measures 
accuracy = accuracy_score(y_train, y_pred)
precision_0 = precision_score(y_train, y_pred, pos_label=0)
precision_1 = precision_score(y_train, y_pred, pos_label=1)
recall_0 = recall_score(y_train, y_pred, pos_label=0)
recall_1 = recall_score(y_train, y_pred, pos_label=1)
f1_0 = f1_score(y_train, y_pred, pos_label=0)
f1_1 = f1_score(y_train, y_pred, pos_label=1)

# Displaying the results 
print(f"accuracy   : {accuracy:.4f}")
print(f"precision_0: {precision_0:.4f}")
print(f"precision_1: {precision_1:.4f}")
print(f"recall_0   : {recall_0:.4f}")
print(f"recall_1   : {recall_1:.4f}")
print(f"f1_0       : {f1_0:.4f}")
print(f"f1_1       : {f1_1:.4f}")

accuracy   : 0.9435
precision_0: 0.9346
precision_1: 0.9892
recall_0   : 0.9977
recall_1   : 0.7464
f1_0       : 0.9651
f1_1       : 0.8508


### Calculating and displaying obtained results on test set 

In [5]:
# Prediction of test set 
y_pred = model.predict(X_test) 

# Calculation of accuracy measures 
accuracy = accuracy_score(y_test, y_pred) 
precision_0 = precision_score(y_test, y_pred, pos_label=0) 
precision_1 = precision_score(y_test, y_pred, pos_label=1) 
recall_0 = recall_score(y_test, y_pred, pos_label=0) 
recall_1 = recall_score(y_test, y_pred, pos_label=1) 
f1_0 = f1_score(y_test, y_pred, pos_label=0) 
f1_1 = f1_score(y_test, y_pred, pos_label=1) 

# Displaying the results 
print(f"accuracy   : {accuracy:.4f}") 
print(f"precision_0: {precision_0:.4f}") 
print(f"precision_1: {precision_1:.4f}") 
print(f"recall_0   : {recall_0:.4f}") 
print(f"recall_1   : {recall_1:.4f}") 
print(f"f1_0       : {f1_0:.4f}") 
print(f"f1_1       : {f1_1:.4f}") 

accuracy   : 0.9270
precision_0: 0.9186
precision_1: 0.9702
recall_0   : 0.9937
recall_1   : 0.6983
f1_0       : 0.9547
f1_1       : 0.8121
