In [7]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

# Load the Titanic dataset
df = pd.read_csv('Titanic-Dataset.csv')

# 1. Prepare Data
# Select features and target
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
target = 'Survived'

# Create a copy to avoid SettingWithCopyWarning
X = df[features].copy()
y = df[target]

# 2. Handle Missing Values and Encode Categorical Features
# Fill missing Age values with the median (without inplace=True)
X['Age'] = X['Age'].fillna(X['Age'].median())

# Fill missing Embarked values (if any)
X['Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])

# One-hot encode categorical features
X = pd.get_dummies(X, columns=['Pclass', 'Sex', 'Embarked'], drop_first=True)

# 3. Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("-------------------------------------------------Bagging------------------------------------------------------------------------------------")

# Train Random Forest
model1 = RandomForestClassifier(n_estimators=100, random_state=42)
model1.fit(X_train, y_train)

# Predict and evaluate
y_pred = model1.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))


print("-------------------------------------------------Boosting------------------------------------------------------------------------------------")

# Train XGBoost model
model2 = XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
model2.fit(X_train, y_train)

# Predict and evaluate
y_pred = model2.predict(X_test)
print("Boosting (XGBoost) Accuracy:", accuracy_score(y_test, y_pred))

print("-------------------------------------------------Stacking------------------------------------------------------------------------------------")

base_models = [
    ('knn', KNeighborsClassifier(n_neighbors=3)),
    ('dt', DecisionTreeClassifier(max_depth=3))
]
meta_model = SVC(kernel='linear', probability=True)
model3 = StackingClassifier(estimators=base_models, final_estimator=meta_model)
model3.fit(X_train, y_train)

# Predict and evaluate
y_pred = model3.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

-------------------------------------------------Bagging------------------------------------------------------------------------------------
Accuracy: 0.7988826815642458
-------------------------------------------------Boosting------------------------------------------------------------------------------------
Boosting (XGBoost) Accuracy: 0.8268156424581006
-------------------------------------------------Stacking------------------------------------------------------------------------------------
Accuracy: 0.7988826815642458
