### Project4.0 - Titanic Survival Prediction using NAIVE BAYES

#### Objective: To predict Titanic Survival based on some parameters (Age, Pclass, Sex, and Fare)

#### *Importing Libraries*

In [1]:
import pandas as pd
import numpy as np

#### *Loading Dataset from Local Directory*

In [2]:
dataset=pd.read_csv("titanicsurvival.csv")
dataset

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.2500,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.9250,1
3,1,female,35.0,53.1000,1
4,3,male,35.0,8.0500,0
...,...,...,...,...,...
886,2,male,27.0,13.0000,0
887,1,female,19.0,30.0000,1
888,3,female,,23.4500,0
889,1,male,26.0,30.0000,1


#### *Summarizing Dataset*

In [3]:
print(dataset.shape)
print(dataset.head(5))

(891, 5)
   Pclass     Sex   Age     Fare  Survived
0       3    male  22.0   7.2500         0
1       1  female  38.0  71.2833         1
2       3  female  26.0   7.9250         1
3       1  female  35.0  53.1000         1
4       3    male  35.0   8.0500         0


#### *Mapping Sex-String Data to Binary Value*

This is to change the text data (male and Female) on the Sex column to binary value (0 and 1)

In [4]:
income_set = set(dataset['Sex'])
dataset['Sex'] = dataset['Sex'].map({'female': 1, 'male': 0}).astype(int)
print(dataset.head)

<bound method NDFrame.head of      Pclass  Sex   Age     Fare  Survived
0         3    0  22.0   7.2500         0
1         1    1  38.0  71.2833         1
2         3    1  26.0   7.9250         1
3         1    1  35.0  53.1000         1
4         3    0  35.0   8.0500         0
..      ...  ...   ...      ...       ...
886       2    0  27.0  13.0000         0
887       1    1  19.0  30.0000         1
888       3    1   NaN  23.4500         0
889       1    0  26.0  30.0000         1
890       3    0  32.0   7.7500         0

[891 rows x 5 columns]>


In [11]:
dataset["Survived"].value_counts() #This is to count from the dataset the number of the Survival and otherwise

0    549
1    342
Name: Survived, dtype: int64

#### *Segregating Dataset into X(Input/IndependentVariable) & Y(Output/DependentVariable)*

In [12]:
#X=dataset.iloc[:,:-1]
#X

# But Using the dataset.drop function, we have

X=dataset.drop("Survived", axis="columns")
X

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,0,22.0,7.2500
1,1,1,38.0,71.2833
2,3,1,26.0,7.9250
3,1,1,35.0,53.1000
4,3,0,35.0,8.0500
...,...,...,...,...
886,2,0,27.0,13.0000
887,1,1,19.0,30.0000
888,3,1,,23.4500
889,1,0,26.0,30.0000


In [13]:
#Y=dataset.iloc[:,-1]
#Y

#But we can also use the command below:
Y = dataset.Survived
Y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

#### *Finding & Removing NA values from our Features X*

In [14]:
X.columns[X.isna().any()] #This is to display the column of the dataset that has a missing value

Index(['Age'], dtype='object')

In [15]:
X.Age = X.Age.fillna(X.Age.mean()) #This is to fill the empty columns with the mean of the data in the column

In [16]:
X.columns[X.isna().any()] #This is use to recheck if there is still any missing value in the missing data

Index([], dtype='object')

#### *Splitting Dataset into Train & Test*

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25,random_state =0)

#### *Training the Dataset*

In [18]:
from sklearn.naive_bayes import GaussianNB  #Importing the Machine Learning ALGORITHM
model = GaussianNB()
model.fit(X_train, y_train)

GaussianNB()

#### *Predicting, wheather Person Survived or Not*

In [20]:
pclassNo = int(input("Enter Person's Pclass number: "))
gender = int(input("Enter Person's Gender 1-female 0-male(1 or 0): "))
age = int(input("Enter Person's Age: "))
fare = float(input("Enter Person's Fare: "))
person = [[pclassNo,gender,age,fare]]
result = model.predict(person)
print(result)

if result == 1:
  print("Person might Survive")
else:
  print("Person might not have Survived")

Enter Person's Pclass number: 3
Enter Person's Gender 1-female 0-male(1 or 0): 0
Enter Person's Age: 35
Enter Person's Fare: 52
[0]
Person might not have Survived




#### *Predicting using Test Data*

In [21]:
y_pred = model.predict(X_test)
print(np.column_stack((y_pred,y_test)))

[[0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 1]
 [0 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]

#### *Evaluating the Accuracy of the model*

In [22]:
from sklearn.metrics import accuracy_score
print("Accuracy of the Model: {0}%".format(accuracy_score(y_test, y_pred)*100))

Accuracy of the Model: 77.57847533632287%
