In [1]:
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

plt.style.use("ggplot")
%matplotlib inline

Hypothesis function for logistic regression takes form $ h(\theta) = \frac{1}{1 \ + \ e^{\ -\theta^T x}} $

With the following cost function: $ Cost \ (h_\theta (x), \ y) = - \ y \ log(h_\theta (x)) \ - \ (1 - y) \ log(1 - h_\theta (x)) $

In [21]:
class LogReg(object):
    
    def __init__(self, iterations=1000, learning_rate=0.01):
        self.iterations = iterations
        self.learning_rate = learning_rate
        self.x = None
        self.y = None
        self.w = None
        self.costs = []
        
    def sigmoid(self):
        return 1 / (1 + np.e ** (-self.w.T @ self.x))
        
    def cost(self):
        return - self.y.T @ np.log(self.sigmoid()).T - (1 - self.y).T @ np.log(1 - self.sigmoid()).T
        
    def fit(self, x, y):
        self.x = x
        self.y = y
        if self.w is None:
            self.w = np.ones((self.x.shape[0], 1))
        for _ in range(self.iterations):
            updated_w = np.ones_like(self.w)
            for i, param in enumerate(self.w):
                updated_w[i] = param - self.learning_rate * 1/self.x.shape[1] * (self.sigmoid().T - self.y).T @ self.x[i, :].T
            self.w = updated_w
            if _ % (self.iterations / 10) == 0:
                print(_)
                self.costs.append(np.sum(self.cost()))
                
    def plot_costs(self):
        plt.plot(self.costs)

In [3]:
titanic = sns.load_dataset("titanic")

In [4]:
titanic.drop(["sex", "embarked", "class", "who", "deck", "embark_town", "alive"], axis=1, inplace=True)
titanic.dropna(inplace=True)

In [5]:
y = titanic["survived"]

In [6]:
x = titanic.drop("survived", axis=1)

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [22]:
mod = LogReg(iterations=5000, learning_rate=0.003)

In [10]:
xm = x.values.T.astype('float')

In [11]:
xm = np.insert(xm, 0, 1, axis=0)

In [12]:
ym = y.values.reshape(len(y), 1)

In [23]:
mod.fit(xm, ym)

0




500
1000
1500
2000
2500
3000
3500
4000
4500


In [20]:
mod.costs

[nan,
 748.3167147636434,
 546.21860001324046,
 475.57250629197972,
 450.58964356468221,
 436.08439226337384,
 424.4334643409772,
 414.20972099505508,
 405.07623774527315,
 396.88712072066585]