## Subgroup 0
Dhruv Prasanna

In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, GradientBoostingClassifier

In [2]:
df = pd.read_csv('cluster0.csv')
X = df.drop(columns=['Bankrupt?', 'Index']).to_numpy()
y = df['Bankrupt?'].to_numpy()

### Preprocessing

In [3]:
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.pipeline import Pipeline
from joblib import dump

preproc_pipe = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('quantile', QuantileTransformer(output_distribution='normal')),
    ('selector', SelectPercentile(score_func=f_classif, percentile=10))
])
X_selected = preproc_pipe.fit_transform(X, y)
dump(preproc_pipe, './artifacts/preprocessing_pipeline_subgroup0.joblib')

['./artifacts/preprocessing_pipeline_subgroup0.joblib']

### Training

In [4]:
random_state = 67
model = StackingClassifier(
    estimators=[
        ('rf', RandomForestClassifier(n_estimators=100, random_state=random_state)),
        ('gb', GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=random_state)),
        ('mlp', MLPClassifier(hidden_layer_sizes=(50,50), max_iter=500, random_state=random_state)),
        ('knn', KNeighborsClassifier(n_neighbors=3))
    ],
    final_estimator=RandomForestClassifier(n_estimators=100, random_state=random_state)
)

In [5]:
from sklearn.metrics import confusion_matrix
def accuracy(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    tt, tf, ft, ff = cm.ravel()
    return tt/(tf+tt)

In [6]:
# Fit on full dataset for training accuracy comparison (Note StackingClassifier uses cross-validation internally for the final estimator)
model.fit(X_selected, y)
tr_out = model.predict(X_selected)

In [7]:
acc = accuracy(y, tr_out)
print(f"Training Accuracy: {acc*100:.2f}%")

Training Accuracy: 99.97%
