In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
train_data=pd.read_csv("LOG_REG_TRAIN.csv")
test_data=pd.read_csv("LOG_TEST.csv")

In [2]:
train_data.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.925
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.05


## Let's have a look at the data first

In [3]:
train_data.describe()

Unnamed: 0,Survived,Pclass,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
count,577.0,577.0,577.0,577.0,577.0,577.0
mean,0.39688,2.310225,29.336222,0.54766,0.376083,31.942309
std,0.489675,0.834863,14.208585,1.114098,0.778832,46.816156
min,0.0,1.0,0.75,0.0,0.0,0.0
25%,0.0,2.0,21.0,0.0,0.0,7.925
50%,0.0,3.0,28.0,0.0,0.0,14.5
75%,1.0,3.0,37.0,1.0,0.0,30.5
max,1.0,3.0,71.0,8.0,5.0,512.3292


### From the data given, we know that "Survived" class is the thing that we need to predict but we also know, that not all the other columns would be contributing to that <br>

### For instance, Although "Pclass(Passenger class)" Plays an important role in deciding the survival rate, "Fare" Doesn't. Also, "Sex" also plays an important role but it is in a different dtype

In [3]:
train_data["Sex"].dtype #Shows Object and is in str format, we need to convert it into integer

dtype('O')

In [5]:
#Simple enough, we can represent males as 0, females as 1, I am not sure if representing them by any 
#other number makes a difference but we will see.
train_data["Sex"]=(train_data["Sex"].replace("female",1)).replace("male",0)

#### So everything except "Fare" ,"Survived" and "Name" seems appropriate for our training data


In [104]:
X_train=train_data.drop(["Survived","Fare","Name"],axis=1)
Y_train=train_data["Survived"]

In [105]:
def Normalize(data):
    return (data-data.mean())/data.std()

In [214]:
X_train_norm=Normalize(X_train)
Y_train_norm=Normalize(Y_train)
#Let's add a column of Ones to account for the bias term
X_train_norm.insert(0,"Ones",np.ones(len(X_train)))

In [107]:
#Activation function
def sigmoid(z):
    return 1/(1+np.exp(-z))
#So what the sigmoid function does is take whatever the output was and map in the range(0,1), which 
#very crucial in case of Logistic Regression, as it allows our final result be interpreted as a 
#probability

def log_loss(y_pred,y_true):
    return -np.mean(y_true*np.log(y_pred)+(1-y_true)*np.log(1-y_pred))

In [229]:
def Logistic_Reg(X,Y,epochs,lr):
    num_samples,num_features=X.shape
    weights=np.random.randn(num_features)
    for epoch in range(epochs):
        z=X@weights
        act=sigmoid(z)
        loss=log_loss(act,Y_train)
        gradients=((act-Y_train)@X) * (1.0/num_samples)
        weights-=lr*gradients
    return weights    

def accuracy(Y_pred,Y_true):
    return sum(Y_pred==Y_true)/len(Y_true)

weights=Logistic_Reg(X_train_norm,Y_train_norm,10000,0.01)

In [233]:
preds=sigmoid(X_train_norm@weights)

In [234]:
preds[preds>=0.5]=1
preds[preds<0.5]=0

In [235]:
accuracy(preds,Y_train)

0.7902946273830156

In [236]:
test_data.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,1,2,Miss. Julie Rachel Christy,female,25.0,1,1,30.0
1,1,1,Mrs. John Borland (Marian Longstreth Morris) T...,female,39.0,1,1,110.8833
2,0,2,Mr. William James Downton,male,54.0,0,0,26.0
3,0,1,Mr. John Hugo Ross,male,36.0,0,0,40.125
4,0,3,Mr. Uscher Paulner,male,16.0,0,0,8.7125


In [239]:
test_data["Sex"]=(test_data["Sex"].replace("female",1)).replace("male",0)

In [240]:
X_test=test_data.drop(["Survived","Fare","Name"],axis=1)
Y_test=test_data["Survived"]

In [243]:
X_test_norm=Normalize(X_test)
X_test_norm.insert(0,"Ones",np.ones(len(X_test)))

In [244]:
test_preds=sigmoid(X_test_norm@weights)
test_preds[test_preds>=0.5]=1.0
test_preds[test_preds<0.5]=0.0

In [245]:
accuracy(test_preds,Y_test)

0.8032258064516129