In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
df=pd.read_csv("C:\\Users\\dell\\OneDrive\\Desktop\\Datasets\\Titanic.csv",usecols=["PassengerId","Survived","Pclass","Sex","Age","SibSp","Parch","Fare","Embarked"])

In [3]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,male,22.0,1,0,7.25,S
1,2,1,1,female,38.0,1,0,71.2833,C
2,3,1,3,female,26.0,0,0,7.925,S
3,4,1,1,female,35.0,1,0,53.1,S
4,5,0,3,male,35.0,0,0,8.05,S


# Data Cleaning

In [4]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Sex              0
Age            177
SibSp            0
Parch            0
Fare             0
Embarked         2
dtype: int64

In [5]:
df.isna().mean()

PassengerId    0.000000
Survived       0.000000
Pclass         0.000000
Sex            0.000000
Age            0.198653
SibSp          0.000000
Parch          0.000000
Fare           0.000000
Embarked       0.002245
dtype: float64

In [6]:
# We will fill the missing values of age by random sample imputation


In [7]:
def randimp(df,variable):
    df[variable+"_random"]=df[variable]
    ran=df[variable].dropna().sample(df[variable].isnull().sum(),random_state=0)
    ran.index=df[df[variable].isnull()].index
    df.loc[df[variable].isnull(),variable+"_random"]=ran

In [8]:
randimp(df,"Age")

In [9]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Age_random
0,1,0,3,male,22.0,1,0,7.25,S,22.0
1,2,1,1,female,38.0,1,0,71.2833,C,38.0
2,3,1,3,female,26.0,0,0,7.925,S,26.0
3,4,1,1,female,35.0,1,0,53.1,S,35.0
4,5,0,3,male,35.0,0,0,8.05,S,35.0


In [10]:
df=df.drop("Age",axis=1)
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age_random
0,1,0,3,male,1,0,7.25,S,22.0
1,2,1,1,female,1,0,71.2833,C,38.0
2,3,1,3,female,0,0,7.925,S,26.0
3,4,1,1,female,1,0,53.1,S,35.0
4,5,0,3,male,0,0,8.05,S,35.0


In [11]:
df.isnull().sum()

PassengerId    0
Survived       0
Pclass         0
Sex            0
SibSp          0
Parch          0
Fare           0
Embarked       2
Age_random     0
dtype: int64

In [12]:
df["Embarked"].value_counts()

S    644
C    168
Q     77
Name: Embarked, dtype: int64

In [13]:
df["Embarked"]=df["Embarked"].fillna("S")

In [14]:
df.isnull().sum()

PassengerId    0
Survived       0
Pclass         0
Sex            0
SibSp          0
Parch          0
Fare           0
Embarked       0
Age_random     0
dtype: int64

In [15]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age_random
0,1,0,3,male,1,0,7.25,S,22.0
1,2,1,1,female,1,0,71.2833,C,38.0
2,3,1,3,female,0,0,7.925,S,26.0
3,4,1,1,female,1,0,53.1,S,35.0
4,5,0,3,male,0,0,8.05,S,35.0


In [16]:
df["Sex"]=pd.get_dummies(df["Sex"],drop_first=True)

In [17]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age_random
0,1,0,3,1,1,0,7.25,S,22.0
1,2,1,1,0,1,0,71.2833,C,38.0
2,3,1,3,0,0,0,7.925,S,26.0
3,4,1,1,0,1,0,53.1,S,35.0
4,5,0,3,1,0,0,8.05,S,35.0


In [18]:
emp={"S":2,"Q":1,"C":0}
df["Embarked"]=df["Embarked"].map(emp)

In [19]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age_random
0,1,0,3,1,1,0,7.25,2,22.0
1,2,1,1,0,1,0,71.2833,0,38.0
2,3,1,3,0,0,0,7.925,2,26.0
3,4,1,1,0,1,0,53.1,2,35.0
4,5,0,3,1,0,0,8.05,2,35.0


In [20]:
df["Age"]=df["Age_random"].astype(int)


In [21]:
df=df.drop("Age_random",axis=1)

In [22]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [23]:
df.shape

(891, 9)

In [24]:
org=df.loc[0:600,:].copy()

In [25]:
org.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [26]:
org.shape

(601, 9)

In [27]:
train=org[:401].copy()

In [28]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [29]:
test=org[401:].copy()

In [30]:
test.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
401,402,0,3,1,0,0,8.05,2,26
402,403,0,3,0,1,0,9.825,2,21
403,404,0,3,1,1,0,15.85,2,28
404,405,0,3,0,0,0,8.6625,2,20
405,406,0,2,1,1,0,21.0,2,34


In [31]:
#X_train = train

In [32]:
#from sklearn.model_selection import train_test_split


In [33]:
features=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]

In [34]:
X_train=train.loc[:,features]
X_test = test.loc[:,features]
y_train=train.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]

In [35]:
#X_train, X_test, y_train, y_test= train_test_split(X,y,random_state=0,train_size=0.6)

In [36]:
org.shape

(601, 9)

In [37]:
y_train.shape

(401, 1)

In [38]:
y_test.shape

(200, 1)

In [39]:
X_test.shape

(200, 7)

In [40]:
X_train.shape

(401, 7)

In [41]:
from sklearn.neighbors import KNeighborsClassifier

In [42]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set


In [43]:
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score

In [44]:
score=r2_score(y_test,y_pred)
accuracy=accuracy_score(y_test,y_pred)

In [45]:
score

-0.2762841301391359

In [46]:
accuracy

0.695

In [47]:
original_pred=y_pred
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [48]:
original_test=y_test
original_test

Unnamed: 0,Survived
401,0
402,0
403,0
404,0
405,0
...,...
596,1
597,0
598,0
599,1


In [49]:
original_test.index

RangeIndex(start=401, stop=601, step=1)

# Metamorphic Testing

# MR 1.2 Permutation of the attribute


In [50]:
feature=["SibSp","Pclass","Sex","Fare","Parch","Age","Embarked"]

In [51]:
org.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [52]:
X_train=train.loc[:,feature]
X_test = test.loc[:,feature]
y_train=train.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]

In [53]:
#X_train, X_test, y_train, y_test= train_test_split(X,y,random_state=0,train_size=0.6)

In [54]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set


In [55]:
y_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [56]:
y_test

Unnamed: 0,Survived
401,0
402,0
403,0
404,0
405,0
...,...
596,1
597,0
598,0
599,1


In [57]:
new_accuracy = accuracy_score(y_test,y_pred)

In [58]:
new_accuracy

0.695

In [59]:
# When permutation of attributes and test set are done, the accuracy is almost same as original

In [60]:
y_pred[0]==original_pred[0]

True

In [61]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

58 142 0


In [62]:
ones=0
zeros=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        ones+=1
    else:
        zeros+=1
print(ones,zeros)

58 142


# MR 2.1 Addition of uninformative Attribute 

# in this case we consider passenger ID as an uninformative attrib 

adding a new column with same numbers ie equalyy associated with all the classes.

In [63]:
org.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [64]:
orga=org.copy()

In [65]:
orga["uninf"]=3

In [66]:
orga.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age,uninf
0,1,0,3,1,1,0,7.25,2,22,3
1,2,1,1,0,1,0,71.2833,0,38,3
2,3,1,3,0,0,0,7.925,2,26,3
3,4,1,1,0,1,0,53.1,2,35,3
4,5,0,3,1,0,0,8.05,2,35,3


In [67]:
feature=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age","uninf"]

In [68]:
train = orga[:401].copy()

In [69]:
test = orga[401:].copy()

In [70]:
X_train=train.loc[:,feature]
X_test = test.loc[:,feature]
y_train=train.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]

In [71]:
#X_train, X_test, y_train, y_test= train_test_split(X,y,random_state=0,train_size=0.6)

In [72]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set


In [73]:
y_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [74]:
y_test

Unnamed: 0,Survived
401,0
402,0
403,0
404,0
405,0
...,...
596,1
597,0
598,0
599,1


In [75]:
# The new accuracy is almost similar to the original one

In [76]:
accuracy_score(y_test,y_pred)

0.695

In [77]:
y_pred[0]==original_pred[0]

True

In [78]:
oness=0
zeross=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        oness+=1
    else:
        zeross+=1
print(oness,zeross)

58 142


In [79]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

58 142 0


# MR 2.2 Addition of informative Attributes

add a new column in such a way that if survived =1 its value should be the same for others ie survived =0 that value can be anything

In [80]:
chn=org.copy()

In [81]:
chn.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [82]:
chn.loc[chn["Survived"]==1,"info"]=1

In [83]:
chn.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age,info
0,1,0,3,1,1,0,7.25,2,22,
1,2,1,1,0,1,0,71.2833,0,38,1.0
2,3,1,3,0,0,0,7.925,2,26,1.0
3,4,1,1,0,1,0,53.1,2,35,1.0
4,5,0,3,1,0,0,8.05,2,35,


In [84]:
chn.loc[chn["Survived"]==0,"info"]=np.random.randint(2,10,size=364)

In [85]:
chn.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age,info
0,1,0,3,1,1,0,7.25,2,22,3.0
1,2,1,1,0,1,0,71.2833,0,38,1.0
2,3,1,3,0,0,0,7.925,2,26,1.0
3,4,1,1,0,1,0,53.1,2,35,1.0
4,5,0,3,1,0,0,8.05,2,35,3.0


In [86]:
feature=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age","info"]

In [87]:
train = chn[:401].copy()

In [88]:
test = chn[401:].copy()

In [89]:
X_train=train.loc[:,feature]
X_test = test.loc[:,feature]
y_train=train.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]

In [90]:
#X_train, X_test, y_train, y_test= train_test_split(X,y,random_state=0,train_size=0.6)

In [91]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [92]:
y_pred

array([0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [93]:
y_test

Unnamed: 0,Survived
401,0
402,0
403,0
404,0
405,0
...,...
596,1
597,0
598,0
599,1


In [94]:
accuracy_score(y_test,y_pred)

0.84

In [95]:
# The obtained accuracy is higher than the original one

In [96]:
y_pred[0]==original_pred[0]

False

In [97]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

49 110 41


In [98]:
oness=0
zeross=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        oness+=1
    else:
        zeross+=1
print(oness,zeross)


58 142


In [99]:
print(f"The number of ones that have changed to zeros are {oness-ones}")
# print(f"The number of zeros that have changed to ones are {zeross-zeros}")


The number of ones that have changed to zeros are 9


In [100]:
y_pred.shape

(200,)

In [101]:
original_pred.shape

(200,)

# Repeat this test again and check it for zero's

In [102]:
chi = org.copy()

In [103]:
chi.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [104]:
chi.loc[chn["Survived"]==0,"info"]=1

In [105]:
chi.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age,info
0,1,0,3,1,1,0,7.25,2,22,1.0
1,2,1,1,0,1,0,71.2833,0,38,
2,3,1,3,0,0,0,7.925,2,26,
3,4,1,1,0,1,0,53.1,2,35,
4,5,0,3,1,0,0,8.05,2,35,1.0


In [106]:
chi[chi["Survived"]==1]

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age,info
1,2,1,1,0,1,0,71.2833,0,38,
2,3,1,3,0,0,0,7.9250,2,26,
3,4,1,1,0,1,0,53.1000,2,35,
8,9,1,3,0,0,2,11.1333,2,27,
9,10,1,2,0,1,0,30.0708,0,14,
...,...,...,...,...,...,...,...,...,...,...
587,588,1,1,1,1,1,79.2000,0,60,
591,592,1,1,0,1,0,78.2667,0,52,
596,597,1,2,0,0,0,33.0000,2,52,
599,600,1,1,1,1,0,56.9292,0,49,


In [107]:
chi[chi["Survived"]==0]

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age,info
0,1,0,3,1,1,0,7.2500,2,22,1.0
4,5,0,3,1,0,0,8.0500,2,35,1.0
5,6,0,3,1,0,0,8.4583,1,28,1.0
6,7,0,1,1,0,0,51.8625,2,54,1.0
7,8,0,3,1,3,1,21.0750,2,2,1.0
...,...,...,...,...,...,...,...,...,...,...
593,594,0,3,0,0,2,7.7500,1,28,1.0
594,595,0,2,1,1,0,26.0000,2,37,1.0
595,596,0,3,1,1,1,24.1500,2,36,1.0
597,598,0,3,1,0,0,0.0000,2,49,1.0


In [108]:
chi.loc[chi["Survived"]==1,"info"]=np.random.randint(2,10,size=237)

In [109]:
chi.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age,info
0,1,0,3,1,1,0,7.25,2,22,1.0
1,2,1,1,0,1,0,71.2833,0,38,7.0
2,3,1,3,0,0,0,7.925,2,26,4.0
3,4,1,1,0,1,0,53.1,2,35,9.0
4,5,0,3,1,0,0,8.05,2,35,1.0


In [110]:
chi.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age,info
596,597,1,2,0,0,0,33.0,2,52,7.0
597,598,0,3,1,0,0,0.0,2,49,1.0
598,599,0,3,1,0,0,7.225,0,36,1.0
599,600,1,1,1,1,0,56.9292,0,49,9.0
600,601,1,2,0,2,1,27.0,2,24,9.0


In [111]:
feature=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age","info"]

In [112]:
test = chi[401:].copy()
train = chi[:401].copy()

In [113]:
X_train=train.loc[:,feature]
X_test = test.loc[:,feature]
y_train=train.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]

In [114]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [115]:
y_pred

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [116]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

41 129 30


In [117]:

oness=0
zeross=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        oness+=1
    else:
        zeross+=1
print(oness,zeross)


58 142


In [118]:
#print(f"The number of ones that have changed to zeros are {oness-ones}")
print(f"The number of zeros that have changed to ones are {zeross-zeros}")

The number of zeros that have changed to ones are 13


# MR 3.2 Additional training sample
# We need to duplicate one class label ie either 0 or 1 and we must check the predictions of a required label

# should be only done training data set not on testing 

In [119]:
org.copy()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.2500,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.9250,2,26
3,4,1,1,0,1,0,53.1000,2,35
4,5,0,3,1,0,0,8.0500,2,35
...,...,...,...,...,...,...,...,...,...
596,597,1,2,0,0,0,33.0000,2,52
597,598,0,3,1,0,0,0.0000,2,49
598,599,0,3,1,0,0,7.2250,0,36
599,600,1,1,1,1,0,56.9292,0,49


In [120]:
train=org[:401].copy()
test = org[401:].copy()

In [121]:
zer=train.copy()
zer.shape

(401, 9)

In [122]:
rep=zer[zer["Survived"]==1]
rep

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.9250,2,26
3,4,1,1,0,1,0,53.1000,2,35
8,9,1,3,0,0,2,11.1333,2,27
9,10,1,2,0,1,0,30.0708,0,14
...,...,...,...,...,...,...,...,...,...
391,392,1,3,1,0,0,7.7958,2,21
393,394,1,1,0,1,0,113.2750,0,23
394,395,1,3,0,0,2,16.7000,2,24
399,400,1,2,0,0,0,12.6500,2,28


In [123]:
sa=pd.concat([zer,rep],axis=0)

In [124]:
sa.shape

(559, 9)

In [125]:
feature=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]

In [126]:
X_train=sa.loc[:,feature]
X_test = test.loc[:,feature]
y_train=sa.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]

In [127]:
#X_train, X_test, y_train, y_test= train_test_split(X,y,random_state=0,train_size=0.6)

In [128]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [129]:
y_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [130]:
y_test.index

RangeIndex(start=401, stop=601, step=1)

In [131]:
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [132]:
original_test.index

RangeIndex(start=401, stop=601, step=1)

In [133]:
original_pred.shape

(200,)

In [134]:
y_pred.shape

(200,)

In [135]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

58 122 20


In [136]:
oness=0
zeross=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        oness+=1
    else:
        zeross+=1
print(oness,zeross)

58 142


In [137]:
print(f"The number of ones that have changed to zeros are {oness-ones}")
#print(f"The number of zeros that have changed to ones are {zeross-zeros}")


The number of ones that have changed to zeros are 0


# Repeating this test and checking for zeros

In [138]:
train=org[:401].copy()
test = org[401:].copy()

In [139]:
zer=train.copy()
zer.shape

(401, 9)

In [140]:
rep=zer[zer["Survived"]==0]
rep

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.2500,2,22
4,5,0,3,1,0,0,8.0500,2,35
5,6,0,3,1,0,0,8.4583,1,28
6,7,0,1,1,0,0,51.8625,2,54
7,8,0,3,1,3,1,21.0750,2,2
...,...,...,...,...,...,...,...,...,...
392,393,0,3,1,2,0,7.9250,2,28
395,396,0,3,1,0,0,7.7958,2,22
396,397,0,3,0,0,0,7.8542,2,31
397,398,0,2,1,0,0,26.0000,2,46


In [141]:
sa=pd.concat([zer,rep],axis=0)

In [142]:
sa.shape

(644, 9)

In [143]:
feature=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]

In [144]:
X_train=sa.loc[:,feature]
X_test = test.loc[:,feature]
y_train=sa.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]

In [145]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [146]:
y_pred

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0], dtype=int64)

In [147]:
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [148]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

32 142 26


In [149]:
oness=0
zeross=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        oness+=1
    else:
        zeross+=1
print(oness,zeross)

58 142


In [150]:
#print(f"The number of ones that have changed to zeros are {oness-ones}")
print(f"The number of zeros that have changed to ones are {zeross-zeros}")

The number of zeros that have changed to ones are 0


# MR 1.1 Permutation of class labels

In [151]:
org.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [152]:
org1=org.copy()

In [153]:
train=org1[:401].copy()

In [154]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [155]:
test=org1[401:].copy()

In [156]:
test.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
401,402,0,3,1,0,0,8.05,2,26
402,403,0,3,0,1,0,9.825,2,21
403,404,0,3,1,1,0,15.85,2,28
404,405,0,3,0,0,0,8.6625,2,20
405,406,0,2,1,1,0,21.0,2,34


In [157]:
org1.shape

(601, 9)

In [158]:
train.loc[train["Survived"]==1,"Survived"]= "one"

In [159]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,one,1,0,1,0,71.2833,0,38
2,3,one,3,0,0,0,7.925,2,26
3,4,one,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [160]:
train.loc[train["Survived"]==0,"Survived"]= 1

In [161]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,1,3,1,1,0,7.25,2,22
1,2,one,1,0,1,0,71.2833,0,38
2,3,one,3,0,0,0,7.925,2,26
3,4,one,1,0,1,0,53.1,2,35
4,5,1,3,1,0,0,8.05,2,35


In [162]:
train.loc[train["Survived"]=="one","Survived"]= 0

In [163]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,1,3,1,1,0,7.25,2,22
1,2,0,1,0,1,0,71.2833,0,38
2,3,0,3,0,0,0,7.925,2,26
3,4,0,1,0,1,0,53.1,2,35
4,5,1,3,1,0,0,8.05,2,35


In [164]:
org.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [165]:
feature=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]

In [166]:
X_train=train.loc[:,feature]
X_test = test.loc[:,feature]
y_train=train.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]
y_train=y_train.astype(int)


In [167]:
#X=org1.loc[:,["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]]
#y=org1.loc[:,["Survived"]] #class label
#y=y.astype(int)

In [168]:
#X_train, X_test, y_train, y_test= train_test_split(X,y,random_state=0,train_size=0.6)

In [169]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [170]:
y_pred

array([0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
       0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0])

In [171]:
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [172]:
#y_test

In [173]:
#ac_ap=accuracy_score(y_test,y_pred)
#ac_ap

In [174]:
# The new accuracy is similar to the original one

In [175]:
original_pred[0]==y_pred[0]

False

In [176]:
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [177]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

0 0 200


In [178]:
oness=0
zeross=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        oness+=1
    else:
        zeross+=1
print(oness,zeross)

58 142


In [179]:
print(f"The number of ones that have changed to zeros are {oness-ones}")
print(f"The number of zeros that have changed to ones are {zeross-zeros}")

The number of ones that have changed to zeros are 58
The number of zeros that have changed to ones are 142


# MR 0 Applying Transformations

In [180]:
org.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [181]:
att=org.copy()

In [182]:
train=org1[:401].copy()
test=org1[401:].copy()

In [183]:
train["Age"]=(train["Age"] * 2) + 1 #k(x) + b where k=2 and b=1
test["Age"]=(test["Age"]*2) + 1

In [184]:
att.copy()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.2500,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.9250,2,26
3,4,1,1,0,1,0,53.1000,2,35
4,5,0,3,1,0,0,8.0500,2,35
...,...,...,...,...,...,...,...,...,...
596,597,1,2,0,0,0,33.0000,2,52
597,598,0,3,1,0,0,0.0000,2,49
598,599,0,3,1,0,0,7.2250,0,36
599,600,1,1,1,1,0,56.9292,0,49


In [185]:
train["Fare"]= (train["Fare"] * 2) + 1
test["Fare"]= (test["Fare"] * 2) + 1

In [186]:
att.copy()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.2500,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.9250,2,26
3,4,1,1,0,1,0,53.1000,2,35
4,5,0,3,1,0,0,8.0500,2,35
...,...,...,...,...,...,...,...,...,...
596,597,1,2,0,0,0,33.0000,2,52
597,598,0,3,1,0,0,0.0000,2,49
598,599,0,3,1,0,0,7.2250,0,36
599,600,1,1,1,1,0,56.9292,0,49


In [187]:
feature=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]

In [188]:
X_train=train.loc[:,feature]
X_test = test.loc[:,feature]
y_train=train.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]
y_train=y_train.astype(int)

In [189]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [190]:
y_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1])

In [191]:
y_test

Unnamed: 0,Survived
401,0
402,0
403,0
404,0
405,0
...,...
596,1
597,0
598,0
599,1


In [192]:
y_test.index

RangeIndex(start=401, stop=601, step=1)

In [193]:
ap_acc = accuracy_score(y_test,y_pred)
ap_acc

0.68

In [194]:
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [195]:
original_test.index

RangeIndex(start=401, stop=601, step=1)

In [196]:
len(original_pred)

200

In [197]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

53 136 11


In [198]:
oness=0
zeross=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        oness+=1
    else:
        zeross+=1
print(oness,zeross)

58 142


In [199]:
print(f"The number of ones that have changed to zeros are {oness-ones}")
print(f"The number of zeros that have changed to ones are {zeross-zeros}")


The number of ones that have changed to zeros are 5
The number of zeros that have changed to ones are 6


In [200]:
# The obtained accuracy is similar to the original one

# 3.1 Add a particular index and its data to the original dataset and apply the model...that particular index value shouldn't change

# repeat this for other labels as well dont have to compare at the end 

# repeat this for random labels

In [201]:
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [202]:
original_test

Unnamed: 0,Survived
401,0
402,0
403,0
404,0
405,0
...,...
596,1
597,0
598,0
599,1


In [203]:
train=org[:401].copy()
test=org[401:].copy()

In [204]:
b=test[0:1].copy()

In [205]:
b["Survived"]=1
#b.loc[0:1,"Survived"]=1

In [206]:
b

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
401,402,1,3,1,0,0,8.05,2,26


In [207]:
c=pd.concat([train,b],axis=0)

In [208]:
c.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [209]:
c.shape

(402, 9)

In [210]:
c.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
397,398,0,2,1,0,0,26.0,2,46
398,399,0,2,1,0,0,10.5,2,23
399,400,1,2,0,0,0,12.65,2,28
400,401,1,3,1,0,0,7.925,2,39
401,402,1,3,1,0,0,8.05,2,26


In [211]:
features=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]

In [212]:
X_train=c.loc[:,feature]
X_test = test.loc[:,feature]
y_train=c.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]
y_train=y_train.astype(int)

In [213]:
#X_train, X_test, y_train, y_test= train_test_split(X,y,random_state=0,train_size=0.6)

In [214]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [215]:
y_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1])

In [216]:
y_pred[0]==original_pred[0]

True

In [217]:
d=test[1:2].copy()

In [218]:
d

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
402,403,0,3,0,1,0,9.825,2,21


In [219]:
original_pred[1]

0

In [220]:
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [221]:
c=pd.concat([train,d],axis=0)

In [222]:
c.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
4,5,0,3,1,0,0,8.05,2,35


In [223]:
c.shape


(402, 9)

In [224]:
features=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]

In [225]:
X_train=c.loc[:,feature]
X_test = test.loc[:,feature]
y_train=c.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]
y_train=y_train.astype(int)

In [226]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [227]:
y_pred[1]==original_pred[1]

True

# 5.2  remove few subsets with a particular class value in this case survived  = , remove them from the training data set(but store its value). Now apply model the training dataset and apply that very same model on the removed subset, it's values should be similar to the stored values

# perform this operation on the traning set before splitting for both the values

In [228]:
org.copy()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.2500,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.9250,2,26
3,4,1,1,0,1,0,53.1000,2,35
4,5,0,3,1,0,0,8.0500,2,35
...,...,...,...,...,...,...,...,...,...
596,597,1,2,0,0,0,33.0000,2,52
597,598,0,3,1,0,0,0.0000,2,49
598,599,0,3,1,0,0,7.2250,0,36
599,600,1,1,1,1,0,56.9292,0,49


In [229]:
train

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.2500,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.9250,2,26
3,4,1,1,0,1,0,53.1000,2,35
4,5,0,3,1,0,0,8.0500,2,35
...,...,...,...,...,...,...,...,...,...
396,397,0,3,0,0,0,7.8542,2,31
397,398,0,2,1,0,0,26.0000,2,46
398,399,0,2,1,0,0,10.5000,2,23
399,400,1,2,0,0,0,12.6500,2,28


In [230]:
rem=train[train["Survived"]==0]

In [231]:
rem

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.2500,2,22
4,5,0,3,1,0,0,8.0500,2,35
5,6,0,3,1,0,0,8.4583,1,28
6,7,0,1,1,0,0,51.8625,2,54
7,8,0,3,1,3,1,21.0750,2,2
...,...,...,...,...,...,...,...,...,...
392,393,0,3,1,2,0,7.9250,2,28
395,396,0,3,1,0,0,7.7958,2,22
396,397,0,3,0,0,0,7.8542,2,31
397,398,0,2,1,0,0,26.0000,2,46


In [232]:
rem.index

Int64Index([  0,   4,   5,   6,   7,  12,  13,  14,  16,  18,
            ...
            382, 384, 385, 386, 388, 392, 395, 396, 397, 398],
           dtype='int64', length=243)

In [233]:
jk=train.copy()

In [234]:
# we will remove few samples of zero ie 0,4,5,6,7,12,13,14,16,18

In [235]:
jk.drop([0,4,5,6,7,12,13,14,16,18],axis=0,inplace=True)

In [236]:
jk.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.925,2,26
3,4,1,1,0,1,0,53.1,2,35
8,9,1,3,0,0,2,11.1333,2,27
9,10,1,2,0,1,0,30.0708,0,14


In [237]:
jk.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
396,397,0,3,0,0,0,7.8542,2,31
397,398,0,2,1,0,0,26.0,2,46
398,399,0,2,1,0,0,10.5,2,23
399,400,1,2,0,0,0,12.65,2,28
400,401,1,3,1,0,0,7.925,2,39


In [238]:
jk.shape

(391, 9)

In [239]:
# we have deleted 10 rows whose class labels aren't 1

In [240]:
test

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
401,402,0,3,1,0,0,8.0500,2,26
402,403,0,3,0,1,0,9.8250,2,21
403,404,0,3,1,1,0,15.8500,2,28
404,405,0,3,0,0,0,8.6625,2,20
405,406,0,2,1,1,0,21.0000,2,34
...,...,...,...,...,...,...,...,...,...
596,597,1,2,0,0,0,33.0000,2,52
597,598,0,3,1,0,0,0.0000,2,49
598,599,0,3,1,0,0,7.2250,0,36
599,600,1,1,1,1,0,56.9292,0,49


In [241]:
features=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]

In [242]:
X_train=jk.loc[:,feature]
X_test = test.loc[:,feature]
y_train=jk.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]
y_train=y_train.astype(int)

In [243]:
#X_train, X_test, y_train, y_test= train_test_split(X,y,random_state=0,train_size=0.6)

In [244]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [245]:
y_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1])

In [246]:
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [247]:
accuracy_score(y_test,y_pred)

0.7

In [248]:
y_pred.shape

(200,)

In [249]:
original_pred.shape

(200,)

In [250]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

58 141 1


In [251]:
oness=0
zeross=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        oness+=1
    else:
        zeross+=1
print(oness,zeross)

58 142


In [252]:
print(f"The number of ones that have changed to zeros are {oness-ones}")
print(f"The number of zeros that have changed to ones are {zeross-zeros}")


The number of ones that have changed to zeros are 0
The number of zeros that have changed to ones are 1


# Checking for the other label ie 0, we will delete some labels whose values are 1

In [253]:
train

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.2500,2,22
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.9250,2,26
3,4,1,1,0,1,0,53.1000,2,35
4,5,0,3,1,0,0,8.0500,2,35
...,...,...,...,...,...,...,...,...,...
396,397,0,3,0,0,0,7.8542,2,31
397,398,0,2,1,0,0,26.0000,2,46
398,399,0,2,1,0,0,10.5000,2,23
399,400,1,2,0,0,0,12.6500,2,28


In [254]:
rem=train[train["Survived"]==1]

In [255]:
rem

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
1,2,1,1,0,1,0,71.2833,0,38
2,3,1,3,0,0,0,7.9250,2,26
3,4,1,1,0,1,0,53.1000,2,35
8,9,1,3,0,0,2,11.1333,2,27
9,10,1,2,0,1,0,30.0708,0,14
...,...,...,...,...,...,...,...,...,...
391,392,1,3,1,0,0,7.7958,2,21
393,394,1,1,0,1,0,113.2750,0,23
394,395,1,3,0,0,2,16.7000,2,24
399,400,1,2,0,0,0,12.6500,2,28


In [256]:
rem.index

Int64Index([  1,   2,   3,   8,   9,  10,  11,  15,  17,  19,
            ...
            381, 383, 387, 389, 390, 391, 393, 394, 399, 400],
           dtype='int64', length=158)

In [262]:
jk=train.copy()

In [263]:
jk.drop([1,2,3,8,9,10,11,15,17,19],axis=0,inplace=True)

In [264]:
jk.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
0,1,0,3,1,1,0,7.25,2,22
4,5,0,3,1,0,0,8.05,2,35
5,6,0,3,1,0,0,8.4583,1,28
6,7,0,1,1,0,0,51.8625,2,54
7,8,0,3,1,3,1,21.075,2,2


In [265]:
jk.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
396,397,0,3,0,0,0,7.8542,2,31
397,398,0,2,1,0,0,26.0,2,46
398,399,0,2,1,0,0,10.5,2,23
399,400,1,2,0,0,0,12.65,2,28
400,401,1,3,1,0,0,7.925,2,39


In [261]:
test

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Fare,Embarked,Age
401,402,0,3,1,0,0,8.0500,2,26
402,403,0,3,0,1,0,9.8250,2,21
403,404,0,3,1,1,0,15.8500,2,28
404,405,0,3,0,0,0,8.6625,2,20
405,406,0,2,1,1,0,21.0000,2,34
...,...,...,...,...,...,...,...,...,...
596,597,1,2,0,0,0,33.0000,2,52
597,598,0,3,1,0,0,0.0000,2,49
598,599,0,3,1,0,0,7.2250,0,36
599,600,1,1,1,1,0,56.9292,0,49


In [266]:
features=["Pclass","Sex","SibSp","Parch","Fare","Embarked","Age"]

In [267]:
X_train=jk.loc[:,feature]
X_test = test.loc[:,feature]
y_train=jk.loc[:,["Survived"]] #class label
y_test = test.loc[:,["Survived"]]
y_train=y_train.astype(int)

In [268]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train.values.ravel()) #  fitting knn to the training set
y_pred=knn.predict(X_test) # predicting the test set

In [269]:
y_pred

array([0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1])

In [270]:
original_pred

array([1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [271]:
ones=0
zeros=0
other=0
for x in range(0,200):
    
    if original_pred[x]==1 and y_pred[x]==1:
        ones+=1
    elif original_pred[x]==0 and y_pred[x]==0:
        zeros+=1
    else:
        other+=1
print(ones,zeros,other)

55 142 3


In [272]:
oness=0
zeross=0
for x in range(len(original_pred)):
    if original_pred[x]==1:
        oness+=1
    else:
        zeross+=1
print(oness,zeross)

58 142


In [273]:
print(f"The number of ones that have changed to zeros are {oness-ones}")
print(f"The number of zeros that have changed to ones are {zeross-zeros}")

The number of ones that have changed to zeros are 3
The number of zeros that have changed to ones are 0
