In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv("Titanic-Dataset.csv")
df = df[['Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]

# Handle missing values
imputer = SimpleImputer(strategy='median') #handle missing values
df[['Age', 'Fare']] = imputer.fit_transform(df[['Age', 'Fare']]) #apply statistics 

df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True) # replace missing values with mode
df['Embarked'] = LabelEncoder().fit_transform(df['Embarked']) #  convert categorical values into numerical values

# Split the data into train and test sets
X = df.drop('Survived', axis=1) #remove survived column
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #20% of data for test

# Initialize and fit the Gaussian Naive Bayes classifier
classifier = GaussianNB()
classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test)

# Evaluate the model
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Confusion Matrix:
 [[86 19]
 [37 37]]
Accuracy: 0.6871508379888268
