# TITANIC SURVIVAL PREDICTION USING NAIVE BAYES

## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Loading Dataset

In [2]:
df = pd.read_csv("titanicsurvival.csv")
df.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.25,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.925,1
3,1,female,35.0,53.1,1
4,3,male,35.0,8.05,0


## Summarising Dataset

In [4]:
df.columns

Index(['Pclass', 'Sex', 'Age', 'Fare', 'Survived'], dtype='object')

In [17]:
df['Pclass'].describe()

count    891.000000
mean       2.308642
std        0.836071
min        1.000000
25%        2.000000
50%        3.000000
75%        3.000000
max        3.000000
Name: Pclass, dtype: float64

In [20]:
df['Sex'].describe()

count      891
unique       2
top       male
freq       577
Name: Sex, dtype: object

In [22]:
df['Sex'] = df['Sex'].map({'male' : 0, 'female' : 1}).astype(int)

In [23]:
df.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,0,22.0,7.25,0
1,1,1,38.0,71.2833,1
2,3,1,26.0,7.925,1
3,1,1,35.0,53.1,1
4,3,0,35.0,8.05,0


In [24]:
df['Age'].describe()

count    714.000000
mean      29.699118
std       14.526497
min        0.420000
25%       20.125000
50%       28.000000
75%       38.000000
max       80.000000
Name: Age, dtype: float64

In [25]:
df['Fare'].describe()

count    891.000000
mean      32.204208
std       49.693429
min        0.000000
25%        7.910400
50%       14.454200
75%       31.000000
max      512.329200
Name: Fare, dtype: float64

## Removing NULL values

In [33]:
df.isna().any()

Pclass      False
Sex         False
Age          True
Fare        False
Survived    False
dtype: bool

In [38]:
df['Age'].fillna(df.Age.mean(), inplace = True)

In [42]:
df.columns.isna().any()

False

## Splitting into test and train sets

In [43]:
X = df.drop('Survived', axis = 1)
X

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,0,22.000000,7.2500
1,1,1,38.000000,71.2833
2,3,1,26.000000,7.9250
3,1,1,35.000000,53.1000
4,3,0,35.000000,8.0500
...,...,...,...,...
886,2,0,27.000000,13.0000
887,1,1,19.000000,30.0000
888,3,1,29.699118,23.4500
889,1,0,26.000000,30.0000


In [45]:
Y = df.Survived
Y.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

In [46]:
from sklearn.model_selection import train_test_split
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size = 0.25, random_state = 0)

## Model development

In [49]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_Train, Y_Train)

GaussianNB()

## Testing the data

In [52]:
Y_Pred = model.predict(X_Test)
Y_Test = np.array(Y_Test)
Pred_Data = np.concatenate((Y_Test.reshape(len(Y_Test), 1), Y_Pred.reshape(len(Y_Pred), 1)), axis = 1)
#print(Pred_Data)
df_pred = pd.DataFrame(Pred_Data, columns = ['Actual', 'Prediction'])
df_pred.head(10)

Unnamed: 0,Actual,Prediction
0,0,0
1,0,0
2,0,0
3,1,1
4,1,1
5,1,0
6,1,1
7,1,1
8,1,1
9,1,1


## Accuracy Metrics

In [53]:
from sklearn.metrics import confusion_matrix, accuracy_score
Confusion_Matrix = confusion_matrix(Y_Test, Y_Pred)
print("Confusion_Matrix :\n", Confusion_Matrix)
print("Accuracy of the Model: {0}%".format(accuracy_score(Y_Test, Y_Pred)*100))

Confusion_Matrix :
 [[110  29]
 [ 21  63]]
Accuracy of the Model: 77.57847533632287%


## Testing with new Data

In [59]:
pclass = int(input("Enter person class: "))
age = int(input("Enter person's Age: "))
sex = int(input("Enter person's Sex(Enter as boolean - Male : 0, Female : 1): "))
fare = float(input("Enter person's Fare: "))
person = np.array([[pclass, sex, age, fare]])
result = model.predict(person)
print(result)
if result:
  print("Person will Survive")
else:
  print("Person won't survive")

Enter person class: 1
Enter person's Age: 25
Enter person's Sex(Enter as boolean - Male : 0, Female : 1): 1
Enter person's Fare: 15.55
[1]
Person will Survive


### _Thank You_