In [2]:
import numpy as np
import scipy
import scipy.optimize as opt
import sklearn
import sklearn.datasets
import matplotlib.pyplot as plt
import sys
import time
from scipy.optimize import bracket

In [3]:
def sigma(w, x):
    return 1 / (1 + np.exp(-np.dot(w, x)))


def function(w, X):
    return -1 / len(X) * np.sum([np.log(sigma(w, (2 * l - 1) * x)) for x, l in zip(X, labels)])


def gradient(w, X):
    return -1 / len(X) * np.sum([x * (l - sigma(w, x)) for x, l in zip(X, labels)], axis=0)


def gessian(w, dw1, dw2, X):
    return -1 / len(X) * np.sum([np.dot(dw1, x) * np.dot(dw2, x) * ((sigma(w, x) - 1) * sigma(w, x)) for x in X])


1


In [3]:
def der(fun, point, epsilon):
    return (fun(point + epsilon) - fun(point)) / epsilon


def check_gradient(fun, grad, X, R, diff_eps=np.sqrt(sys.float_info.epsilon)):
    dim = X.shape[1]
    w = np.random.random(dim)
    w = (2 * w - 1) * R
    dw = np.eye(dim)
    difs = [
        np.abs((np.dot(grad(w, X), dw_i) - der(lambda t: fun(w + t * dw_i, X), 0, diff_eps)) / np.dot(grad(w, X), dw_i))
        for dw_i in dw]
    return np.average(difs)


def check_gessian(grad, gess, X, R, diff_eps=np.sqrt(sys.float_info.epsilon)):
    dim = X.shape[1]
    w = np.random.random(dim)
    w = (2 * w - 1) * R
    dw = np.eye(dim)
    difs = [
        np.abs(
            (gess(w, dw1, dw2, X) - der(lambda t: np.dot(grad(w + t * dw1, X), dw2), 0, diff_eps))
            / gess(w, dw1, dw2, X))
        for dw1 in dw for dw2 in dw]
    return np.average(difs)


[[1 2]
 [1 4]]


In [None]:
def golden_search_bounded(fun, a0, b0, eps=0.0001):
    ratio = (1 + 5 ** 0.5) / 2

    def step(a, b, c, fc):
        if b - a < eps:
            return a, fun(a)
        else:
            d = a + b - c
            fd = fun(d)
            if c > d:
                c, d = d, c
                fc, fd = fd, fc
            if fc < fd:
                return step(a, d, c, fc)
            else:
                return step(c, b, d, fd)

    c0 = a0 + (b0 - a0) / ratio
    return step(a0, b0, c0, fun(c0))[0]


def golden_search(fun, b=300, a=0, eps=0.0001):
    x = golden_search_bounded(fun, a, b, eps)
    if np.abs(x - a) < eps:
        return golden_search(fun, a, 2 * a - b)
    if np.abs(x - b) < eps:
        return golden_search(fun, 2 * b - a, b)
    return x


In [None]:
def gradient_descent(fun, grad, one_dim_search, start, epsilon):
    x = start
    d0 = grad(x)
    d = grad(x)
    k = 0
    while np.dot(d, d) / np.dot(d0, d0) > epsilon:
        x1 = x - d * one_dim_search(lambda alpha: fun(x - d * alpha))
        if k % 10 == 0:
            print(np.linalg.norm(x - x1))
        x = x1
        d = grad(x)
        k += 1
    return x, k