In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

warnings.filterwarnings('ignore', category=FutureWarning)


In [12]:
titanic_data = sns.load_dataset('titanic')

In [13]:
titanic_data.head()
titanic_data.isnull().sum()
titanic_data['age'].fillna(titanic_data['age'].mean(), inplace=True)
titanic_data = pd.get_dummies(titanic_data, columns=['sex'])

In [14]:
features = ['pclass', 'age', 'sibsp', 'parch', 'fare', 'sex_female', 'sex_male']
X = titanic_data[features]
y = titanic_data['survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

In [16]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)

Accuracy: 0.7988826815642458
Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.86      0.83       105
           1       0.78      0.72      0.75        74

    accuracy                           0.80       179
   macro avg       0.80      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179



In [17]:
predictions_binary = ["No" if prediction == 0 else "Yes" for prediction in y_pred]

In [18]:
prediction_df = pd.DataFrame({'Predicted Survived': predictions_binary})
print(prediction_df.head())

  Predicted Survived
0                 No
1                 No
2                 No
3                Yes
4                 No
