### Import Prequisite Library

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.impute import KNNImputer , SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

### Import  Dataset

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')

In [3]:
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [4]:
df = df[['Age' ,'Pclass' , 'Fare' ,'Survived']]

In [5]:
df.head()

Unnamed: 0,Age,Pclass,Fare,Survived
0,22.0,3,7.25,0
1,38.0,1,71.2833,1
2,26.0,3,7.925,1
3,35.0,1,53.1,1
4,35.0,3,8.05,0


In [6]:
df.isnull().sum()

Age         177
Pclass        0
Fare          0
Survived      0
dtype: int64

### Independent and Dependent data split

In [7]:
x = df.drop(columns = ['Survived'] , axis = 1)
y = df[['Survived']]

#### Train Test Split

In [8]:
x_train , x_val , y_train , y_val = train_test_split(x , y , test_size = 0.2 , random_state = 190)

### KNNImputer

In [9]:
knn = KNNImputer(n_neighbors=3,
    weights='distance')

In [10]:
x_train.isnull().sum()

Age       137
Pclass      0
Fare        0
dtype: int64

In [11]:
x_val.isnull().sum()

Age       40
Pclass     0
Fare       0
dtype: int64

##### Fill missing values

In [12]:
x_train_trf1 = knn.fit_transform(x_train)
x_val_trf1 = knn.transform(x_val)

In [13]:
x_train_trf1[x_train_trf1 == np.nan]

array([], dtype=float64)

In [14]:
x_val_trf1[x_val_trf1 == np.nan]

array([], dtype=float64)

#### Build ML Model

In [15]:
lr = LogisticRegression()

In [16]:
lr.fit(x_train_trf1 , y_train)
y_train_pred1 = lr.predict(x_train_trf1)
y_val_pred1 = lr.predict(x_val_trf1)

### Training Accuracy

In [17]:
accuracy_score(y_train , y_train_pred1)

0.7036516853932584

### Testing Accuracy

In [18]:
accuracy_score(y_val , y_val_pred1)

0.7653631284916201

### SImpleImputer

In [19]:
x_train.isnull().sum()

Age       137
Pclass      0
Fare        0
dtype: int64

In [20]:
x_val.isnull().sum()

Age       40
Pclass     0
Fare       0
dtype: int64

In [21]:
sm = SimpleImputer()

In [22]:
x_train_trf2 = sm.fit_transform(x_train)
x_val_trf2 =  sm.transform(x_val)

### model

In [23]:
lr = LogisticRegression()

lr.fit(x_train_trf2 , y_train)

y_train_pred2 = lr.predict(x_train_trf2)

y_val_pred2 = lr.predict(x_val_trf2)

#### Training Accuracy

In [24]:
accuracy_score(y_train , y_train_pred2)

0.699438202247191

##### Testing Accuracy

In [25]:
accuracy_score(y_val , y_val_pred2)

0.7541899441340782