In [1]:
#importing important libraries

import numpy as np

In [2]:
import tensorflow as tf

TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"

train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)
test_file_path = tf.keras.utils.get_file("eval.csv", TEST_DATA_URL)

In [3]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

train = pd.read_csv(train_file_path)

In [4]:
train.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


# Pipeline and preprocessing

In [5]:
num_pipeline = Pipeline([('std_scaler', StandardScaler())])
cat_pipeline = Pipeline([('one_hot_encoding', OneHotEncoder(handle_unknown = "ignore", sparse = False))])

In [6]:
num_attributes = train.select_dtypes(include=[np.number]).columns.tolist()
num_attributes.remove("survived")
cat_attributes = train.select_dtypes(exclude=[np.number]).columns.tolist()

In [7]:
target = train['survived']
features = train.drop(["survived"], axis=1)

In [8]:
full_pipeline = ColumnTransformer([
            ("num", num_pipeline, num_attributes),
            ("cat", cat_pipeline, cat_attributes)])

In [9]:
features = full_pipeline.fit_transform(features)

# Helper functions

In [10]:
#Lets implement the following

#Activation (sigmoid function)
def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

#using multiple linear models or perceptrons
#differential of sigmoid function is 
def sigmoid_prime(x):
    return sigmoid(x)* (1-sigmoid(x))


#Ouptut(prediction function)
def prediction(features, weight,bias):
    return sigmoid(np.dot(features,weight)+ bias)


#error (log-loss function) for a single perceptron
def error_formula(y,output):
    return -y*np.log(output) - (1-y)*np.log(1-output)

#error (log-loss function) for multiple perceptron
def error_fucn(x,y,output):
    differential = sigmoid_prime(x)
    error = y - output
    return error*differential


#gradient descent step
def update_weights(x,y, weight,bias, learnrate):
    pred = prediction(x,weight,bias)
    error = y - pred
    weight += learnrate * error * x
    bias += learnrate * error
    return weight,bias

# Training Function

In [11]:
#seed random numbers to make calculation deterministic
np.random.seed(45)

def training(features,targets,epochs,learnrate):
    errors = []
    last_loss = None
    #assign a random weight
    n_records, n_features = features.shape
    weights = np.random.normal(scale=1 / n_features**.5, size=n_features)
    bias = 0
    for iter in range(epochs):
        for x,y in zip(features,targets):
            output = prediction(x,weights,bias)
            error = y - output
            weights, bias = update_weights(x,y,weights,bias,learnrate)
        
        #print out log loss for training dataset
        out = prediction(features,weights,bias)
        loss = np.mean(error_formula(target,out))
        errors.append(loss)
        if iter % (epochs/10) == 0:
            print('\n==============Epoch', iter, "========")
        if last_loss and last_loss < loss:
            print('Training loss:', loss, " WARNING ---  Loss is increasing")
        else:
            print('Training loss', loss)
        last_loss  = loss
        predictions = out > 0.5
        accuracy = np.mean(predictions==targets)
        print("Accuracy: ", accuracy)

In [12]:
training(features,target,10,0.01)


Training loss 0.4851238446298384
Accuracy:  0.7719298245614035

Training loss 0.44884250790371155
Accuracy:  0.8118022328548644

Training loss 0.43517567657268796
Accuracy:  0.8118022328548644

Training loss 0.42892098265419376
Accuracy:  0.8118022328548644

Training loss 0.42565698585045747
Accuracy:  0.8165869218500797

Training loss 0.42376789901305173
Accuracy:  0.8149920255183413

Training loss 0.4225761643002155
Accuracy:  0.8165869218500797

Training loss 0.4217674714406743
Accuracy:  0.8149920255183413

Training loss 0.4211837570164918
Accuracy:  0.8149920255183413

Training loss 0.42073998986640326
Accuracy:  0.8165869218500797
