# Data Loading
Nothing much here. Just importing the repo and putting into X and Y.

In [6]:
from ucimlrepo import fetch_ucirepo 
import numpy as np
  
taiwanese_bankruptcy_prediction = fetch_ucirepo(id=572) 
  
X = taiwanese_bankruptcy_prediction.data.features 
y = taiwanese_bankruptcy_prediction.data.targets 

# SKLearn Imports and Data Preprocessing
Importing necessary SKLearn packages and preprocessing the data. (80/20 train test split)

In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=69)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Default Decision Tree Classifier
When reading a paper on Bankruptcy Prediction, Random Forest and XGBoost were used. Upon the creation of this ipynb, Decision Trees, Random Forests, and XGBoost were all tested and the default XGBoost and Decision Tree returned simliar scores, so the XGBoost algorithm was chosen as the first path of exploration with hopes of being able to have more optimization with this algorithm. These were the scores of each default algorithm:
- Decision Tree:
    - Accuracy: 0.9589
    - Precision: 0.3860
    - Recall: 0.5116
    - F1: 0.4400
- Random Forest:
    - Accuracy: 0.9692
    - Precision: 0.5294
    - Recall: 0.2093
    - F1: 0.3000
- XGBoost:
    - Accuracy: 0.9721
    - Precision: 0.6087
    - Recall: 0.3256
    - F1: 0.4242

In [8]:
dt_model = DecisionTreeClassifier(random_state=69)

dt_model.fit(X_train_scaled, y_train)

dt_y_pred = dt_model.predict(X_test_scaled)

dt_accuracy = accuracy_score(y_test, dt_y_pred)
dt_precision = precision_score(y_test, dt_y_pred)
dt_recall = recall_score(y_test, dt_y_pred)
dt_f1 = f1_score(y_test, dt_y_pred)

print(f"Decision Tree Accuracy: {dt_accuracy:.4f}")
print(f"Decision Tree Precision: {dt_precision:.4f}")
print(f"Decision Tree Recall: {dt_recall:.4f}")
print(f"Decision Tree F1 Score: {dt_f1:.4f}")

Decision Tree Accuracy: 0.9589
Decision Tree Precision: 0.3860
Decision Tree Recall: 0.5116
Decision Tree F1 Score: 0.4400


In [9]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=69)

rf_model.fit(X_train_scaled, y_train)

rf_y_pred = rf_model.predict(X_test_scaled)

rf_accuracy = accuracy_score(y_test, rf_y_pred)
rf_precision = precision_score(y_test, rf_y_pred)
rf_recall = recall_score(y_test, rf_y_pred)
rf_f1 = f1_score(y_test, rf_y_pred)

print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(f"Random Forest Precision: {rf_precision:.4f}")
print(f"Random Forest Recall: {rf_recall:.4f}")
print(f"Random Forest F1 Score: {rf_f1:.4f}")

  return fit_method(estimator, *args, **kwargs)


Random Forest Accuracy: 0.9692
Random Forest Precision: 0.5294
Random Forest Recall: 0.2093
Random Forest F1 Score: 0.3000


In [10]:
import xgboost as xgb

xgb_model = xgb.XGBClassifier(objective='binary:logistic', random_state=69)
xgb_model.fit(X_train_scaled, y_train)
xgb_y_pred = xgb_model.predict(X_test_scaled)

xgb_accuracy = accuracy_score(y_test, xgb_y_pred)
xgb_precision = precision_score(y_test, xgb_y_pred)
xgb_recall = recall_score(y_test, xgb_y_pred)
xgb_f1 = f1_score(y_test, xgb_y_pred)

print(f"XGBoost Accuracy: {xgb_accuracy:.4f}")
print(f"XGBoost Precision: {xgb_precision:.4f}")
print(f"XGBoost Recall: {xgb_recall:.4f}")
print(f"XGBoost F1 Score: {xgb_f1:.4f}")

XGBoost Accuracy: 0.9721
XGBoost Precision: 0.6087
XGBoost Recall: 0.3256
XGBoost F1 Score: 0.4242
