In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset
url = "https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv"
data = pd.read_csv(url)

# Preprocessing function (SAFE)
def preprocess_data(data):
    data = data.copy()

    # Fill missing Age
    if 'Age' in data.columns:
        data['Age'] = data['Age'].fillna(data['Age'].mean())

    # Encode Sex
    if 'Sex' in data.columns:
        data['Sex'] = LabelEncoder().fit_transform(data['Sex'])

    # Drop non-numeric / irrelevant columns
    data = data.drop(columns=['Name', 'Ticket', 'Cabin'], errors='ignore')

    return data

# Apply preprocessing
data = preprocess_data(data)

# Split features and target
X = data.drop(columns=['Survived'])
y = data['Survived']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Base models
random_forest = RandomForestClassifier(
    n_estimators=200,
    random_state=42
)

gradient_boosting = GradientBoostingClassifier(
    n_estimators=150,
    learning_rate=0.05,
    random_state=42
)

# Meta model
meta_model = LogisticRegression(
    max_iter=300,
    random_state=42
)

# Stacking Ensemble
stacking_clf = StackingClassifier(
    estimators=[
        ('rf', random_forest),
        ('gb', gradient_boosting)
    ],
    final_estimator=meta_model
)

# Train model
stacking_clf.fit(X_train, y_train)

# Predict
y_pred = stacking_clf.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Stacking Ensemble Accuracy: {accuracy:.2f}")


Stacking Ensemble Accuracy: 0.82
