### Import libraries

In [1]:
import pandas as pd
import numpy as np

### load dataset

In [2]:
dataset = pd.read_csv('Titanic survied.csv')

### Summarize data

In [3]:
print(dataset.shape)
print(dataset.head())
print(dataset.columns)

(891, 6)
   PassengerId  Survived  Pclass     Sex   Age     Fare
0            1         0       3    male  32.0  21.1710
1            2         1       1  female  38.0  71.2833
2            3         1       3  female  26.0   7.9250
3            4         1       1  female  35.0  53.1000
4            5         0       3    male  35.0   8.0500
Index(['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'Fare'], dtype='object')


### Mapping text data to binary value

In [4]:
income_set = set(dataset['Sex'])
dataset['Sex']= dataset['Sex'].map({'female':0, 'male':1}).astype(int)
print(dataset.head())
print(income_set)

   PassengerId  Survived  Pclass  Sex   Age     Fare
0            1         0       3    1  32.0  21.1710
1            2         1       1    0  38.0  71.2833
2            3         1       3    0  26.0   7.9250
3            4         1       1    0  35.0  53.1000
4            5         0       3    1  35.0   8.0500
{'female', 'male'}


### Segregate the dataset input and output

In [5]:
X = dataset.drop(['Survived','PassengerId'],axis='columns')
X

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,1,32.0,21.1710
1,1,0,38.0,71.2833
2,3,0,26.0,7.9250
3,1,0,35.0,53.1000
4,3,1,35.0,8.0500
...,...,...,...,...
886,2,1,27.0,13.0000
887,1,0,19.0,30.0000
888,3,0,,23.4500
889,1,1,26.0,30.0000


In [6]:
y = dataset.Survived
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

### Finding & removing NA values from our features X

In [7]:
X.columns[X.isna().any()]

Index(['Age'], dtype='object')

In [8]:
X.Age = X.Age.fillna(X.Age.mean())

### Test again to check any null values

In [9]:
X.columns[X.isna().any()]

Index([], dtype='object')

### splitting data train and test

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=0)

### Model building

In [12]:
from sklearn.naive_bayes import GaussianNB
model=GaussianNB()
model=model.fit(X_train, y_train)

### Predicting wheather the person survived or not

In [13]:
pclassno = int(input("enter person pclass no:"))
gender = int(input("enter person gender 0-female 1-male:"))
age = int(input("enter person age:"))
fare = float(input("enter person fare:"))
person =[[pclassno,gender,age,fare]]
result = model.predict(person)
print(result)

if result==1:
    print("person might be survived")
else:
    print("person might not be survived")

enter person pclass no:3
enter person gender 0-female 1-male:1
enter person age:35
enter person fare:8.05
[0]
person might not be survived


### Prediction for all TEst data

In [14]:
y_pred = model.predict(X_test)
print(np.column_stack((y_pred,y_test)))

[[0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 1]
 [0 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]

### Accuracy of the model

In [15]:
from sklearn.metrics import accuracy_score
print('accuracy of the model:{0}'.format(accuracy_score(y_pred,y_test)*100))

accuracy of the model:77.57847533632287
