In [11]:
import numpy as np
import math
from sklearn.utils.extmath import softmax
import matplotlib.pyplot as plt
from import_LR import import_LR
from pprint import pprint

In [12]:
'''Functions'''
def sigmoid(x):
    '''Computes the sigmoid function'''
    s = 1/(1+np.exp(-x))
    return s

def hessian(N, d, X, wk, lambda_):
    '''Computes the Hessian matrix'''
    
    u_H = 0
    I = np.eye(d)
    for n in range(N):
        u_H += sigmoid(w.T @ X[:,n,None])  *  (1-sigmoid(w.T @ X[:,n,None]))  *  (X[:,n,None] @ X[:,n,None].T)  +  1/lambda_*I
 
#     m_H = (sigmoid(w.T @ X) * (1-sigmoid(w.T @ X)).T) * (X @ X.T) + 1/lambda_*I  # Experimental

#     H = (sigmoid(w.T @ X) @ (1-sigmoid(w.T @ X)).T) * (X @ X.T) + 1/lambda_*I  # Experimental
    
    return u_H
    
def gradient_vector(N, X, y, wk, lambda_):
    '''Computes the gradient of the negative log-likelihood'''
    
    u_l = 0
    for n in range(N):
        u_l += -(y[:,n]-sigmoid(w.T @ X[:,n]))[0] * X[:,n,None]
    u_l += 1/lambda_*w
    
#     l = -((y-sigmoid(w.T @ X)) @ X.T).T + 1/lambda_*w  # Should work
      
    return u_l

In [13]:
'''Import data'''
X, y = import_LR()                       # Input data & labels
ones = np.ones((1,X.shape[1]))
X = np.vstack([ones, X])
epsilon = 1e-10                           # Convergence metric
max_iter = 500

In [14]:
'''Metrics'''
d = X.shape[0]                           # Number of dimensions
N = X.shape[1]                           # Number of samples
C = list(set(y))                         # Classes
lambda_ = 1                              # Regularization parameter

In [15]:
'''Matrix initialization'''
y = y[np.newaxis,:]
w = np.random.randn(d,1)

In [16]:
# Experimental
# a = np.random.randint(1,5,(d,d))
# print(a)
# print('\n\n')

# print(a[:,0,None]@a[:,0,None].T)
# print(a[:,1,None]@a[:,1,None].T)
# # print(a[None,0,:]@a[None,0,:].T)
# # print(a[None,1,:]@a[None,1,:].T)
# print('\n\n')

# print(np.tensordot(a,a,0).T)
# # print(np.tensordot(X,X, axes=1).shape)
# # print(np.tensordot(X,X, axes=2).shape)

In [17]:
'''Newton-Raphson'''
i = 0
e = 2*epsilon
conv = False
while ((e > epsilon) and (i < max_iter)):
    w_prev = w
    H = hessian(N, d, X, w, lambda_)
    l = gradient_vector(N, X, y, w, lambda_)
    w = w - np.linalg.inv(H) @ l
    e = np.linalg.norm(w - w_prev)/np.linalg.norm(w_prev)  # e = np.amax(np.abs(wk1 - wk))
    if (i%100==0):
        print('w')
        print(w)
        print('H')
        print(H)
        print('l')
        print(l)
        print('i')
        print(i)
        print('e')
        print(e)
    i += 1

w
[[-0.07824337]
 [-0.05199713]
 [ 0.98135842]]
H
[[1154.50055429  -21.63922256   -3.34675121]
 [ -21.63922256 1419.06946963  163.46696062]
 [  -3.34675121  163.46696062 1267.91483357]]
l
[[ -29.43268689]
 [-523.52315013]
 [  59.23262015]]
i
0
e
0.3376153602048997
w
[[0.18355041]
 [1.20052349]
 [0.75547627]]
H
[[1117.0446527    -8.17413267  -28.31178889]
 [  -8.17413267 1157.02829407  -62.46897266]
 [ -28.31178889  -62.46897266 1277.24153134]]
l
[[-0.00224929]
 [-0.00136971]
 [-0.00085829]]
i
100
e
1.7569496742573562e-06
w
[[0.1835703 ]
 [1.20053531]
 [0.75548374]]
H
[[1117.04373996   -8.17499454  -28.3125499 ]
 [  -8.17499454 1157.0261458   -62.46930257]
 [ -28.3125499   -62.46930257 1277.23863559]]
l
[[-1.23709895e-07]
 [-6.96567071e-08]
 [-4.48769556e-08]]
i
200
e
9.44497010007549e-11
