# Titanic - Machine Learning from Disaster
## ML Project from kaggle
This project can be found on https://www.kaggle.com/c/titanic/data?select=train.csv

### Goal
It is your job to predict if a passenger survived the sinking of the Titanic or not.
For each in the test set, you must predict a 0 or 1 value for the variable.


### Import Data

In [778]:
import pandas as pd
import numpy as np

train = pd.read_csv('train.csv')

train_data = np.array(train)
columns = train.columns.values


test_numerical, train_meta1 = pd.factorize(train[columns[4]])
train_data[:,4] = test_numerical
train[columns[4]] = test_numerical
test_numerical, train_meta2 = pd.factorize(train[columns[11]])
train_data[:,11] = test_numerical
train[columns[11]] = test_numerical

traintarget = train_data[:,1]
traintarget =np.array(traintarget,dtype=float)
train_data = train_data[:,[2, 4, 5, 6, 7, 9, 11]]
train_data =np.array(train_data,dtype=float)
train_data[np.isnan(train_data)] = 40
traintarget[np.isnan(traintarget)] = 0

for i in range(train_data.shape[1]):
    train_data[:,i] = train_data[:,i]*0.99/np.max(train_data[:,i]) +0.01


train_target = np.zeros((len(traintarget),2))
for i in range(len(train_target)):
    train_target[i,int(traintarget[i])] = 1

### Implementation Neural Network


In [779]:
def sigmoid(x):
    return 1/(1+np.e ** -x)
def d_sigmoid(x):
    return x * (1-x)

activation_func = sigmoid
d_activation_func = d_sigmoid

def forward_single_layer(out_prev, w_curr, b_curr):
        return activation_func(np.dot(w_curr, out_prev)+b_curr)

class NeuralNetwork:
    def __init__(self, architecture):
        self.architecture = architecture
        self.parameters = {}
        for idx, layer in enumerate(architecture):
            layer_idx = idx+1
            layer_input = layer["inputs"]
            layer_output = layer["neurons"]
            self.parameters['W'+str(layer_idx)] = np.random.randn(layer_output,layer_input)
            self.parameters['B'+str(layer_idx)] = np.random.randn(layer_output,1)

    def forward_propagation(self, inputs):
        inputs = np.array(inputs, ndmin=2).T
        out_curr = inputs
        memory = {}
        for idx, layer in enumerate(self.architecture):
            layer_idx = idx+1
            w_curr = self.parameters["W"+str(layer_idx)]
            b_curr = self.parameters["B"+str(layer_idx)]
            out_curr = forward_single_layer(out_prev=out_curr,w_curr=w_curr,b_curr=b_curr)
            memory[idx] = out_curr
        return out_curr, memory


    def train_iteration(self, inputs, target, learn_rate):
        target = np.array(target, ndmin=2).T
        output, memory = self.forward_propagation(inputs)
        inputs = np.array(inputs, ndmin=2).T
        loss = 0
        for idx, layer in reversed(list(enumerate(self.architecture))):
            layer_idx = idx+1
            if layer_idx == len(self.architecture):
                loss += target - output
                adjust_w = learn_rate*1/layer["neurons"]*(loss*d_activation_func(memory[idx])).dot(memory[idx-1].T)
                adjust_b = learn_rate*1/layer["neurons"]*loss*d_activation_func(memory[idx])
                self.parameters["W"+str(layer_idx)] += (adjust_w/learn_rate - adjust_b)*learn_rate
                self.parameters["B"+str(layer_idx)] += adjust_b
            elif layer_idx == 1:
                loss = self.parameters["W"+str(layer_idx+1)].T.dot(loss)
                adjust_w = learn_rate*1/layer["neurons"]*(loss*d_activation_func(memory[idx])).dot(inputs.T)
                adjust_b = learn_rate*1/layer["neurons"]*loss*d_activation_func(memory[idx])
                self.parameters["W"+str(layer_idx)] += (adjust_w/learn_rate - adjust_b)*learn_rate
                self.parameters["B"+str(layer_idx)] += adjust_b
            else:
                loss = self.parameters["W"+str(layer_idx+1)].T.dot(loss)
                adjust_w = learn_rate*1/layer["neurons"]*(loss*d_activation_func(memory[idx])).dot(memory[idx-1].T)
                adjust_b = learn_rate*1/layer["neurons"]*loss*d_activation_func(memory[idx])
                self.parameters["W"+str(layer_idx)] += (adjust_w/learn_rate - adjust_b)*learn_rate
                self.parameters["B"+str(layer_idx)] += adjust_b

    def train(self, inputs, targets, epoches):
        for epoch in range(epoches):
            shuffler = np.random.permutation(len(targets))
            inputs = inputs[shuffler]
            targets = targets[shuffler]
            for i in range(0,891):
                inputdata = inputs[i,:]
                target = targets[i,:]
                self.train_iteration(inputdata, target, 0.1)
            if epoch/epoches * 100 % 10 == 0:
                print('Training Processed: '+str(epoch/epoches*100)+'%')

In [780]:
NN_architecture = [
    {"inputs":7, "neurons":5},
    {"inputs":5, "neurons":2}
]
NN = NeuralNetwork(NN_architecture)

In [781]:
NN.train(train_data,train_target,100)

Training Processed: 0.0%
Training Processed: 10.0%
Training Processed: 20.0%
Training Processed: 30.0%
Training Processed: 40.0%
Training Processed: 50.0%
Training Processed: 60.0%
Training Processed: 70.0%
Training Processed: 80.0%
Training Processed: 90.0%


### Test on Train Dataset

In [782]:
count = 0
predict = np.empty((400,2))
for i in range(400):
    out, mem = NN.forward_propagation(train_data[i,:])
    predict[[i],:] = out.T
    if np.argmax(out) == np.argmax(train_target[i,:]):
        count += 1
performance = (count/400)*100
print('Performance of prediction: ', performance, '%')

Performance of prediction:  82.25 %


## Test Data for idmission

In [783]:
test = pd.read_csv('test.csv')

test_data = np.array(test)
columns = test.columns.values


test_numerical, train_meta1 = pd.factorize(test[columns[3]])
test_data[:,3] = test_numerical
test[columns[3]] = test_numerical
test_numerical, train_meta2 = pd.factorize(test[columns[10]])
test_data[:,10] = test_numerical
test[columns[10]] = test_numerical

test_data = test_data[:,[1, 3, 4, 5, 6, 8, 10]]
test_data =np.array(test_data,dtype=float)
test_data[np.isnan(test_data)] = 40

for i in range(test_data.shape[1]):
    test_data[:,i] = test_data[:,i]*0.99/np.max(test_data[:,i]) +0.01

In [784]:
result = np.empty(test_data.shape[0])
for i in range(test_data.shape[0]):
    res, mem = NN.forward_propagation(test_data[i,:])
    result[i] = np.argmax(res)

### Create CSV file for idmission

In [785]:
ID = np.arange(892, 1310, 1)
ID = np.array(ID, ndmin=2, dtype=int).T
result = np.array(result, ndmin=2, dtype=int).T
sub = np.hstack((ID,result))

sub = pd.DataFrame(sub, columns=['PassengerId', 'Survived'])
sub.to_csv('sub.csv', index=False)

## Conclusion
