In [121]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

train_data['Sex'] = train_data['Sex'].map({'male': 0, 'female': 1})
test_data['Sex'] = test_data['Sex'].map({'male': 0, 'female': 1})

def zs_normalization(x, axis = 0):
    mean = np.mean(x, axis = 0)
    std_dev = np.std(x)
    return (x - mean)/std_dev

x_train = train_data.drop(['Survived', 'Name'], axis = 1).values
x_train = zs_normalization(x_train)

x_test = test_data.drop(['Survived', 'Name'], axis = 1).values
x_test = zs_normalization(x_test)

y_train = train_data['Survived'].values
y_test = test_data['Survived'].values

def f(x, w, b):
    f = 1/(1 + np.exp(-np.dot(x, w) - b))
    bin_f = np.where(f<0.5, 0, 1)
    return bin_f

def gradient_descent(x, y, w, b, alpha, itr, reg):
    m = len(y)
    
    for i in range(itr+1):
        y_cap = f(x, w, b)
        error = (y_cap - y)

        temp_w = (1-(reg/m))*w - (alpha/m)*np.dot(error, x)
        temp_b = b - (2*alpha/m)*np.sum(error)
        w = temp_w
        b = temp_b

    return w,b

def f1_score(y_true, y_predict):
    
    tp = np.sum((y_true == 1) & (y_predict == 1))
    tn = np.sum((y_true == 0) & (y_predict == 0))
    fp = np.sum((y_true == 0) & (y_predict == 1))
    fn = np.sum((y_true == 1) & (y_predict == 0))
    
    precision = tp/(tp + fp)
    recall = tp/(tp + fn)

    if precision + recall == 0:
        return 0

    f1_score = 2*(precision*recall)/(precision + recall)
    return f1_score

w = np.zeros(x_train.shape[1])
b = 0

alpha = 0.001
itr = 10000
reg = 0.001

w, b = gradient_descent(x_train, y_train, w, b, alpha, itr, reg)


y_cap_train = f(x_train, w, b)
f1_train = f1_score(y_train, y_cap_train)
print ('Training Accuracy:', f1_train)


y_cap_test = f(x_test, w, b)
f1_test = f1_score(y_test, y_cap_test)
print ('Testing Accuracy:', f1_test)

    

Training Accuracy: 0.7105263157894737
Testing Accuracy: 0.7471698113207547
