In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

In [2]:
# Load the Titanic dataset
df = pd.read_csv("./Dataset/Titanic.xls")
# Preprocess the dataset

# Drop irrelevant columns
df = df.drop(columns=['Sex','Name', 'Ticket', 'Cabin', 'Embarked'])

# Fill missing values (if any)
imputer = SimpleImputer(strategy='mean')
df['Age'] = imputer.fit_transform(df[['Age']])
df['Fare'] = imputer.fit_transform(df[['Fare']])

# Encode categorical features (e.g., Sex)
label_encoder = LabelEncoder()
# df['Sex'] = label_encoder.fit_transform(df['Sex'])

# Split data into features and target
X = df.drop(columns=['Survived'])
y = df['Survived']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
X_train.head()

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare
336,1228,2,32.0,0,0,13.0
31,923,2,24.0,2,0,31.5
84,976,2,30.27259,0,0,10.7083
287,1179,1,24.0,1,0,82.2667
317,1209,2,19.0,0,0,10.5


In [4]:
# Initialize the Naive Bayes classifier
nb_model = GaussianNB()

# Initialize the BaggingClassifier using the Naive Bayes model
bagging_model = BaggingClassifier(estimator=nb_model, n_estimators=50, random_state=42)

# Fit the model to the training data
bagging_model.fit(X_train, y_train)

# Predict on the test set
y_pred = bagging_model.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 61.90%
