In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline


df = pd.read_csv('train_data_with_groups.csv')


X = df.drop('Bankrupt?', axis=1)
y = df['Bankrupt?']

# training and test sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# stacking classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('lr', LogisticRegression(random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('svc', SVC(random_state=42, kernel='poly', degree=3, coef0=1, gamma='scale', probability=True))
    ],
    final_estimator=RandomForestClassifier(random_state=43),
    cv=10
)

# train stacking classifier
stacking_clf.fit(x_train, y_train)

# predict bankruptcies using the test set
y_pred = stacking_clf.predict(x_test)


predictions_df = pd.DataFrame({'Index': x_test.index, 'Bankrupt?': y_pred})

predictions_df.to_csv('submission.csv', index=False)