In [159]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import path
from scipy.optimize import fmin_tnc

In [160]:
df = pd.read_csv('df.csv')

In [161]:
y = df['Survived']
x = df.drop('Survived', axis=1)

In [171]:
x = pd.get_dummies(x)

X_train, X_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, random_state=42)

In [172]:
class LogisticRegressionUsingGD:

    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    def probability(self, w, x):
        return self.sigmoid(np.dot(x, w))

    def cost_function(self, w, x, y):
        m = x.shape[0]
        A = self.probability(w, x)
        total_cost = -(1 / m) * np.sum(y * np.log(A) + (1 - y) * np.log(1 - A))
        return total_cost

    def gradient(self, w, x, y):
        m = x.shape[0]
        return (1 / m) * np.dot(x.T, self.sigmoid(np.dot(x, w)) - y)

    def fit(self, x, y, w):
        opt_weights = fmin_tnc(
            func=self.cost_function,
            x0=w,
            fprime=self.gradient,
            args=(x, y.flatten())
        )
        self.w_ = opt_weights[0]
        return self

    def predict(self, x):
        theta = self.w_[:, np.newaxis]
        predicted = self.probability(theta, x)
        ss = list(map(lambda v: 1 if v > 0.5 else 0, predicted))
        return np.array(ss)

    def accuracy(self, x, actual_classes):
        return accuracy_score(actual_classes, x)

model = LogisticRegressionUsingGD()

y_train_ = np.array([y_train.values])
theta = np.zeros((X_train.shape[1], 1))

model.fit(X_train, y_train_, theta)
y_predicted = model.predict(X_valid)



print("The accuracy of the model is {}".format(model.accuracy(y_valid, y_predicted)))

The accuracy of the model is 0.7932960893854749


In [173]:
y_predicted

array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
       0, 1, 0])

0.7932960893854749