# Titanic Servivers classification using neural network

## Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
data=pd.read_csv("data/train.csv")
test_X=pd.read_csv("data/test.csv")
test_Y=pd.read_csv("data/gender_submission.csv")

def extract(data):
    x=data[['Pclass','Sex','Age','SibSp','Parch','Embarked']]
    x['Age'].fillna(x['Age'].mean(),inplace=True)
    x['Embarked'].fillna('S')
    x['Pclass']=x['Pclass']/3
    x['Age']=x['Age']/80.0
    x['SibSp']=x['SibSp']/8
    x['Parch']=x['Parch']/6
    #x['Fare']=x['Fare']/600
    return pd.get_dummies(x).values

train_x=extract(data)
test_x=extract(test_X)

train_y=[str(i) for i in data.Survived]
test_y=[str(i) for i in test_Y.Survived]
train_y,test_y=pd.get_dummies(train_y).values,pd.get_dummies(test_y).values
del(data,test_X,test_Y)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stab

## Building neural network

In [6]:
class Neural_Network(object):
  def __init__(self):
  #parameters
    self.inputSize = 9 #no of input in neural network
    self.hl1Size = 30 #no of nodes in hidden layer 1
    self.hl2Size = 30 #no of nodes in hidden layer 2
    self.outputSize = 2 #no of layer in output layer

  #weights
    self.W1 = np.random.randn(self.inputSize, self.hl1Size) 
    self.W2 = np.random.randn(self.hl1Size,self.hl2Size) 
    self.W3 = np.random.randn(self.hl2Size, self.outputSize) 
    self.b1=np.random.randn(self.hl1Size)
    self.b2=np.random.randn(self.hl2Size)
    self.b3=np.random.randn(self.outputSize)

  def forward(self, X):
    #forward propagation through our network
    self.z = np.dot(X, self.W1)+self.b1 # dot product of X (input) and first set of weights and adding bias
    self.hl1 = self.sigmoid(self.z) # activation function
    
    self.z2 = np.dot(self.hl1, self.W2)+self.b2 # dot product of hidden layer1 and second set of weights and adding bias
    self.hl2 = self.sigmoid(self.z2) # activation function
    
    self.z3 = np.dot(self.hl2, self.W3)+self.b3 # dot product of hidden layer2 and third set of weights and adding bias
    o = self.sigmoid(self.z3) # final activation function
    
    return o

  def sigmoid(self, s):
    # activation function
    return 1/(1+np.exp(-s))

  def sigmoidPrime(self, s):
    #derivative of sigmoid
    return s * (1 - s)

  def backward(self, X, y, o):
    # backward propagate through the network
    self.o_error = y - o # error in output
    self.o_delta = self.o_error*self.sigmoidPrime(o)

    self.z3_error = self.o_delta.dot(self.W3.T)
    self.z3_delta = self.z3_error*self.sigmoidPrime(self.hl2) 
    
    self.z2_error = self.z3_delta.dot(self.W2.T) 
    self.z2_delta = self.z2_error*self.sigmoidPrime(self.hl1) 

    self.W1 += X.reshape(self.inputSize,1).dot(self.z2_delta.reshape(1,self.hl1Size)) # adjusting first set (input --> hidden) weights
    self.W2 += self.hl1.T.dot(self.z3_delta) # adjusting second set (hidden --> output) weights
    self.W3 += self.hl2.reshape(self.hl2Size,1).dot(self.o_delta.reshape(1,self.outputSize)) # adjusting second set (hidden --> output) weights

  def train(self, X, y, n=1,batch=50):
    min,hw1,hw2,hw3,hb1,hb2,hb3=1,0,0,0,0,0,0
    print("Before Training loss: "+str(np.mean(np.square(y - self.forward(X)))))
    for j in range(n):
        for k in range(0,len(X),batch):
            avg_loss=[]
            for i in range(k,k+batch):
                o = self.forward(X[i])
                self.backward(X[i], y[i], o)
                l=np.mean(np.square(y - self.forward(X)))
                avg_loss.append(l)
                if min>l:
                    min=l
                    hw1,hb1=self.W1,self.b1
                    hw2,hb2=self.W2,self.b2
                    hw3,hb3=self.W3,self.b3
            print("avg loss of this batch is " + str(np.mean(avg_loss)))
              
    print("min loss: "+str(min))
    self.W1,self.b1=hw1,hb1
    self.W2,self.b2=hw2,hb2
    self.W3,self.b3=hw3,hb3
    print("After Training loss: "+str(np.mean(np.square(y - self.forward(X)))))
   
  def predict(self,X):
    return self.forward(X).round()

In [7]:
NN=Neural_Network()
NN.train(train_x,train_y,n=10,batch=297)

Before Training loss: 0.37393089682943526
avg loss of this batch is 0.20280455320609042
avg loss of this batch is 0.1737401957890361
avg loss of this batch is 0.1583609532643388
avg loss of this batch is 0.1555128365249644
avg loss of this batch is 0.1583082662738791
avg loss of this batch is 0.15378307340666503
avg loss of this batch is 0.15212796919872335
avg loss of this batch is 0.15260709190921953
avg loss of this batch is 0.1500476313629593
avg loss of this batch is 0.15105759269306965
avg loss of this batch is 0.1489367900236295
avg loss of this batch is 0.1471797377261384
avg loss of this batch is 0.1502796473642819
avg loss of this batch is 0.14629148895037578
avg loss of this batch is 0.14460950818671775
avg loss of this batch is 0.1485485413820073
avg loss of this batch is 0.1441913114943492
avg loss of this batch is 0.14227858436258378
avg loss of this batch is 0.1472985558957148
avg loss of this batch is 0.14180969079787895
avg loss of this batch is 0.14064550952896487
avg

In [8]:
prediction=NN.predict(test_x)

In [9]:
n=0
t=len(prediction)
for i in range(t):
    try:
        if accuracy_score(test_y[i].astype("float").round(), prediction[i].round())==1.0:
            n+=1
    except:pass
output=n/t
output

0.8301435406698564