In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [4]:
# loading dataset
titanic = sns.load_dataset('titanic')

In [5]:
data = titanic[['survived', 'pclass', 'sex', 'age', 'fare', 'embarked']].copy()

In [6]:
data.loc[:, 'age'] = data['age'].fillna(data['age'].median())
data.loc[:, 'embarked'] = data['embarked'].fillna(data['embarked'].mode()[0])

data.loc[:,'sex'] = data['sex'].map({'male': 0, 'female': 1})
data.loc[:,'embarked'] = data['embarked'].map({'S': 0, 'C': 1, 'Q':2})

In [7]:
x = data.drop('survived', axis = 1)
y = data['survived']

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 42)

In [11]:
rf = RandomForestClassifier( n_estimators = 10, random_state=42)

In [12]:
# training the data
rf.fit( x_train, y_train)
RandomForestClassifier( random_state = 42 )
y_pred = rf.predict(x_test)

In [16]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix( y_test, y_pred))

Accuracy: 0.7877094972067039

Classification report:
               precision    recall  f1-score   support

           0       0.80      0.85      0.82       105
           1       0.76      0.70      0.73        74

    accuracy                           0.79       179
   macro avg       0.78      0.78      0.78       179
weighted avg       0.79      0.79      0.79       179


Confusion Matrix:
 [[89 16]
 [22 52]]


In [18]:
feature_importance = pd.DataFrame({ 'Feature': x.columns, 'Importance': rf.feature_importances_}).sort_values(by = 'Importance', ascending = False)

In [19]:
print(feature_importance)

    Feature  Importance
3      fare    0.311066
1       sex    0.311029
2       age    0.267754
0    pclass    0.079160
4  embarked    0.030992


In [21]:
new_passenger = pd.DataFrame([[3, 0, 28, 50, 0]], columns = x.columns)
prediction = rf.predict(new_passenger)
print("Survived" if prediction[0] == 1 else "Not Survived")

Not Survived
