In [69]:
from math import log, exp, sqrt
import numpy as np

In [70]:
## Prepare Data
from diabeticRetinopathyUtils import load_diabetic_retinopathy
X,y = load_diabetic_retinopathy(filename = "diabeticRetinopathy.csv")

## Q1.1

### Computing the value, gradient and hessian of the objective function $f$

The goal is to find $ (\omega_0^*, \omega^*) = \underset{\omega_0 \in \mathbb{R}, \omega \in \mathbb{R}^n}{arg min} f(\omega_0, \omega)$ where $$f(\omega_0, \omega) = \frac{1}{n}\sum_{i=1}^n{log\left(1+ e^{-y_i\left(x_i^T\omega+\omega_0\right)}\right)} + \frac{\rho}{2}{\|\omega\|}^2_2$$


The gradient will be :

$$ \nabla f(w_{0},w ) =\big ( \frac {\partial f}{\partial w_0} ,  \frac {\partial f}{\partial w} \big) = \big( \frac{1}{n}\sum_{n=1}^{n}\frac{-y_{i}}{1 + exp(y_{i}(X_{i}^T w + w_{0}))} , \frac{1}{n}\sum_{n=1}^{n}\frac{-X_{i} y_{i}}{1 + exp(y_{i}(X_{i}^T w + w_{0}))} + \rho w  \big)   $$


The Hessian matrix will be : 


Let $ z_{i} = x_{i}^T w +w_{0} $

$$ \frac {\partial^2 f}{\partial w_0 ^2} =  \frac {1}{n} \sum_{n=1}^{n} \frac {exp(y_{i}z_{i})}{( 1 + exp(y_{i}z_{i}))^2} y_{i}^2 $$

$$ \frac {\partial^2 f}{\partial w_0 \partial w} =  \frac {1}{n} \sum_{n=1}^{n} \frac {x^{T} y_{i}^2 exp(y_{i}z_{i})}{( 1 + exp(y_{i}z_{i}))^2} $$

$$ \frac {\partial^2 f}{\partial w \partial w_0} =  \frac {1}{n} \sum_{n=1}^{n} \frac {x_{i} y_{i}^2 exp(y_{i}z_{i})}{( 1 + exp(y_{i}z_{i}))^2} $$

$$ \frac {\partial^2 f}{\partial w ^2} =  \frac {1}{n} \sum_{n=1}^{n} \frac {x_{i}x^{T} y_{i}^2 }{( 1 + exp(y_{i}z_{i}))^2}  + \rho I$$

$Hessian(f)$ =
\begin{pmatrix} 
\frac {\partial^2 f}{\partial w_0 ^2} &  \frac {\partial^2 f}{\partial w_0 \partial w}\\
\frac {\partial^2 f}{\partial w \partial w_0} & \frac {\partial^2 f}{\partial w ^2} 
\end{pmatrix}

We can see that every element of the hessian matrix is positive, so we can say that the hessian matrix is positive and the objetive funtion is convex.

## Q1.2

In [86]:
def f(w0,w,X,y):
    n = range(X.shape[0])
    rho = 1.0/X.shape[0]
    z = [X[i].dot(w)+w0 for i in n]
    
    #VALUE
    summands = [log(1+exp(-y[i]*z[i])) for i in n]
    func = sum(summands) + rho/2*np.linalg.norm(w)**2
        
    #GRADIENT
    summands = [-y[i]/(1+exp(y[i]*z[i])) for i in n]
    grad_w0 = sum(summands)
                        
    summands = [-X[i]*y[i]/(1+exp(y[i]*z[i])) for i in n]  
    grad_w = sum(summands) + rho*np.linalg.norm(w)
    
    #HESSIAN
    # Prepare a*a^T where a = [1,X[i]]
    a = [np.insert(X[i],0,1) for i in n]
    ex = [exp(y[i]*z[i]) for i in n]
    # Prepare a matrix with zeros in the first row and colomn, rho else
    sqrt_rho = sqrt(rho)*np.ones(19)
    sqrt_rho = np.insert(sqrt_rho,0,0)
    rho_matrix = np.outer(sqrt_rho,sqrt_rho)
    
    summands = [np.outer(a[i],a[i])*ex[i]*(y[i]/(1+ex[i])**2) for i in n]
    hessian = sum(summands) + rho_matrix
    
    return func,[grad_w0,grad_w],hessian
    
if False:
    w = np.random.randn(19)
    f(0,w,X,y)
    
    
#TODO check_gradient
    
    
    

## Q1.3

In [None]:
def Newton(f,w0=0,w=0,X=X,y=y):
    