In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

# Load Titanic dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)
print(df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']].head())


In [None]:
# Drop rows with missing target values
df.dropna(subset=['Survived'], inplace=True)

# Select features & target
X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']].copy()
y = df['Survived'].copy()

# Encode categorical 'Sex' to numeric
X['Sex'] = X['Sex'].map({'female': 0, 'male': 1})

# Fill missing age values with median
X['Age'].fillna(X['Age'].median(), inplace=True)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


In [None]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)


In [None]:
y_pred = nb_model.predict(X_test)

# Accuracy and classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", report)


In [None]:
ample = X_test.iloc[0:1]
prediction = nb_model.predict(sample)

print(f"\nSample Passenger: {sample.iloc[0].to_dict()}")
print(f"Predicted Survival: {'Survived' if prediction[0] == 1 else 'Did Not Survive'}")