In [2]:
import scipy as sp
import numpy as np
import copy
import numpy.random as rnd

The gradient (and loss) as a function :

In [3]:
def GradERM(X, y, w, v, sigmafunc, Dsigmafunc, LambdaRegularization):
    zw = np.matmul(X, w)
    zv = np.matmul(X, v)
    GradwTotal = np.matmul(np.transpose(X), - np.divide(y - zw,sigmafunc(zv))) + X.shape[0]*LambdaRegularization*w
    GradvTotal = (np.matmul(np.transpose(X), np.multiply((np.divide(1,(2*sigmafunc(zv))) - np.divide(np.power((y - zw),2),(2*np.power(sigmafunc(zv),2)))), Dsigmafunc(zv))) 
                  + X.shape[0]*LambdaRegularization*v)
    #print((np.multiply(- np.divide(np.power((y - zw),2),(2*np.power(sigmafunc(zv),2))), Dsigmafunc(zv))))
    #print((np.matmul(np.transpose(X), np.multiply(- np.divide(np.power((y - zw),2),(2*np.power(sigmafunc(zv),2))), Dsigmafunc(zv)))))
    return(np.stack((GradwTotal, GradvTotal), axis = 1))

def LossERM(X, y, w, v, sigmafunc, LambdaRegularization):
    zw = np.matmul(X, w)
    zv = np.matmul(X, v)
    return( np.sum(np.divide(np.power((y - zw),2),(2*sigmafunc(zv))) + np.log(sigmafunc(zv))/2, 0) + X.shape[0]*LambdaRegularization*( np.dot(w,w) + np.dot(v,v) )/2 )

The GD step

In [84]:
def GDStepERM(X, y, wv, sigmafunc, Dsigmafunc, LambdaRegularization, LearningRate):
    wv -= LearningRate*GradERM(X, y, wv[:,0], wv[:,1], sigmafunc, Dsigmafunc, LambdaRegularization)
    return(LossERM(X, y, wv[:,0], wv[:,1], sigmafunc, LambdaRegularization))

Full GD function

In [None]:
def GDERM(X, y, wv, sigmafunc, Dsigmafunc, LambdaRegularization = 1, LearningRate = 0.02, MaxIter = 1e4, EpsConvergence = 1e-6, Verbose = True, VerboseRate = 100):
    Conv = 1
    NIter = 0
    Losses = [LossERM(X, y, wv[:,0], wv[:,1], sigmafunc, LambdaRegularization)]
    print("Iteration %s" % NIter)
    print("Current loss %s" % Losses[NIter])
    while((NIter < MaxIter) and (Conv > EpsConvergence)):
        Losses.append(GDStepERM(X, y, wv, sigmafunc, Dsigmafunc, LambdaRegularization, LearningRate))
        NIter = NIter + 1
        Conv = np.abs(Losses[NIter] - Losses[NIter-1])/Losses[NIter]
        if(Verbose and NIter%VerboseRate == 0):
            print("Iteration %s" % NIter)
            print("Current loss %s" % Losses[NIter])
            print("Current convergence criterion %s" % Conv)
    print("Iteration %s" % NIter)
    print("Current loss %s" % Losses[NIter])
    print("Current convergence criterion %s" % Conv)
    return(np.array(Losses))
        

Sigma Functions for $\sigma(z_v) = z_v^2$

In [6]:
def sigmaSquare(zv):
    return(np.power(zv,2))

def DsigmaSquare(zv):
    return(2*zv)

Sigma Functions for $\sigma(z_v) = \log(1 + e^{z_v})$

In [7]:
def sigmaSoftplus(zv):
    return(np.log(1 + np.exp(zv)))

def DsigmaSoftplus(zv):
    return(np.divide(1, 1 + np.exp(-zv)))

Main variables 

In [82]:
d = 100
M = 1000
alpha = M/d
LearningRate = 0.00002
LambdaRegularization = 1
X = rnd.normal(0, 1/np.sqrt(d), size = (M, d))
wvTrue = rnd.normal(0, 1, size = (d, 2))
wvLearned = rnd.normal(0, 1, size = (d, 2))
yTrue = np.matmul(X, wvTrue[:,0])
yNoisy = yTrue + rnd.normal(0, np.sqrt(sigmaSoftplus(np.matmul(X, wvTrue[:,1]))))

Runner

In [83]:
Losses = GDERM(X, yNoisy, wvLearned, sigmaSoftplus, DsigmaSoftplus, LearningRate=LearningRate, MaxIter = 1000, VerboseRate = 50)

Iteration 0
Current loss 107643.31421119397
Iteration 50
Current loss 14624.755275236885
Current convergence criterion 0.0389425364164676
Iteration 100
Current loss 2758.751111034706
Current convergence criterion 0.026679766292326212
Iteration 150
Current loss 1222.980791802717
Current convergence criterion 0.007800500042422977
Iteration 200
Current loss 1023.8754854820069
Current convergence criterion 0.001208325274933448
Iteration 250
Current loss 998.0521146432887
Current convergence criterion 0.00016079555810851126
Iteration 300
Current loss 994.7021499599063
Current convergence criterion 2.0932311387582293e-05
Iteration 350
Current loss 994.2674873097054
Current convergence criterion 2.7175188172358565e-06
Iteration 375
Current loss 994.2260183722535
Current convergence criterion 9.790479897175527e-07


In [85]:
Losses

array([107643.31421119, 103293.97569731,  99126.33021243,  95132.32883084,
        91304.3315308 ,  87635.08014051,  84117.67382376,  80745.54678414,
        77512.44791611,  74412.42217158,  71439.79344467,  68589.14880518,
        65855.32393493,  63233.38964098,  60718.63933617,  58306.57739182,
        55992.90827912,  53773.52642628,  51644.50672701,  49602.09564359,
        47642.70285401,  45762.89339867,  43959.38028651,  42229.01752494,
        40568.79354156,  38975.8249688 ,  37447.35076549,  35980.72665182,
        34573.41983632,  33223.00401544,  31927.15462806,  30683.64434863,
        29490.3388043 ,  28345.19250208,  27246.2449539 ,  26191.61698769,
        25179.50723387,  24208.18877744,  23276.00596639,  22381.37136791,
        21522.76286455,  20698.72088274,  19907.8457469 ,  19148.79515273,
        18420.28175332,  17721.07085284,  17049.97820203,  16405.86789084,
        15787.65033328,  15194.28034012,  14624.75527524,  14078.11329164,
        13553.43164349,  