In [1]:
import numpy as np
from bokeh.plotting import figure, output_notebook, show
output_notebook()

In [2]:
def generate_dataset(n):
    x = np.arange(n)
    res_x = []
    res_y = []
    for i in range(n):
        rand = 1/(1+np.exp(-i/n)**2)*(i/n)
        y_i = 1 if np.random.rand() < rand else 0
        res_x.append([1, x[i]])
        res_y.append(y_i)
    return np.array(res_x), np.array(res_y)

In [3]:
def proba(coef, x):
    return 1 / (1+np.exp(-x.dot(coef)))

In [4]:
def cross_entropy(coef, x, y):
    p = proba(coef, x)
    return -np.mean(y*np.log(p) + (1-y)*(1-p))

In [5]:
def gradient(coef, x, y):
    p = proba(coef, x)
    return -np.mean(y * x.transpose() / (1+np.exp(x.dot(coef))) - (1-y)*x.transpose()*p, axis=1)

In [6]:
def logistic_regression(coef, x, y, lr, epsilon=1e-4):
    prev_error = 0
    while True:
        error = cross_entropy(coef, x, y)
        if abs(error - prev_error) <= epsilon:
            break
        prev_error = error
        grad = gradient(coef, x, y)
        coef = np.subtract(coef, lr * grad)
    return coef

In [7]:
coef = np.array([0, 0])
x, y = generate_dataset(200)
p = figure()
p.scatter(x[:,1], y)
show(p)

In [8]:
coef = logistic_regression(coef, x, y, 1e-2)

In [9]:
p = figure()
p.scatter(x[:,1], y)
p.line(x[:,1], proba(coef, x), color="orange")
show(p)
coef

array([-16.06930975,   0.20835642])