In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import xgboost as xgb

train = pd.read_csv("C:\\Users\\joshn\\Documents\\Coding\\Spaceship Titanic\\Stacking\\Process\\CombinedTrain.csv")
test = pd.read_csv("C:\\Users\\joshn\\Documents\\Coding\\Spaceship Titanic\\Stacking\\Process\\CombinedTest.csv")

# Separate ID before training
train_passenger_ids = train['PassengerId']
test_passenger_ids = test['PassengerId']

# Prepare data
X_train = train.drop(['Transported', 'PassengerId'], axis=1)
y_train = train['Transported'].astype('int')  # Convert True/False to 1/0
X_test = test.drop('PassengerId', axis=1)

# Create the Logistic Regression model with best parameters
best_params_lr = {'solver': 'saga', 'penalty': 'l1', 'max_iter': 300, 'l1_ratio': 0.75, 'C': 0.1}
lr_model = LogisticRegression(**best_params_lr, random_state=1)

# Create the Random Forest model with best parameters
best_params_rf = {'n_estimators': 250, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 10, 'criterion': 'gini', 'class_weight': None, 'bootstrap': True}
rf_model = RandomForestClassifier(**best_params_rf, random_state=1)

# Create the Support Vector Machine model - untuned
svm_model = SVC(probability=True, random_state=1)

# Create the XGBoost model with best parameters
best_params_cv = {'subsample': 0.7, 'reg_lambda': 1.1, 'reg_alpha': 0.1, 'n_estimators': 300, 'min_child_weight': 5, 'max_depth': 3, 'learning_rate': 0.05, 'gamma': 0.2, 'colsample_bytree': 0.8}
xgb_model = xgb.XGBClassifier(**best_params_cv, random_state=1)


In [2]:
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import xgboost as xgb

# Base models
base_models = [
    ('lr', lr_model),  # Logistic Regression
    ('rf', rf_model),  # Random Forest
    ('xgb', xgb_model),  # XGBoost
    ('svc', svm_model)  # Support Vector Classifier
]

# Meta-learner
meta_learner = LogisticRegression()

# Stacking classifier
stacking_clf = StackingClassifier(estimators=base_models, final_estimator=meta_learner)

# Fit the stacking classifier
stacking_clf.fit(X_train, y_train)

# Make predictions using the stacking classifier
stacked_predictions = stacking_clf.predict(X_test)

# Prepare submission
test_passenger_ids = test['PassengerId']
test_predictions = pd.DataFrame({
    'PassengerId': test_passenger_ids,
    'Transported': stacked_predictions
})
test_predictions['Transported'] = test_predictions['Transported'].astype(bool)

# Export to CSV
test_predictions.to_csv("C:\\Users\\joshn\\Documents\\Coding\\Spaceship Titanic\\Stacking\\Test\\Submission - Stacking.csv", index=False)

