In [1]:
import scipy as sp
import numpy as np
import copy
import numpy.random as rnd

The gradient (and loss) as a function :

In [66]:
def GradERM(X, y, w, v, sigmafunc, Dsigmafunc, LambdaRegularization):
    zw = np.matmul(X, w)
    zv = np.matmul(X, v)
    GradwTotal = np.matmul(np.transpose(X), - np.divide(y - zw,2*sigmafunc(zv))) + X.shape[0]*LambdaRegularization*w
    print(np.matmul(np.transpose(X), np.divide(y - zw,2*sigmafunc(zv))))
    print(X.shape[0]*LambdaRegularization*w)
    GradvTotal = (np.matmul(np.transpose(X), np.multiply((np.divide(1,(2*sigmafunc(zv))) - np.divide(np.power((y - zw),2),(2*np.power(sigmafunc(zv),2)))), Dsigmafunc(zv))) 
                  + X.shape[0]*LambdaRegularization*v)
    return(np.stack((GradwTotal, GradvTotal), axis = 1))

def LossERM(X, y, w, v, sigmafunc, LambdaRegularization):
    zw = np.matmul(X, w)
    zv = np.matmul(X, v)
    return( np.sum(np.divide(np.power((y - zw),2),(2*sigmafunc(zv))) + np.log(sigmafunc(zv))/2, 0) + X.shape[0]*LambdaRegularization*( np.dot(w,w) + np.dot(v,v) )/2 )

The GD step

In [64]:
def GDStepERM(X, y, wv, sigmafunc, Dsigmafunc, LambdaRegularization, LearningRate):
    wv = wv - LearningRate*GradERM(X, y, wv[:,0], wv[:,1], sigmafunc, Dsigmafunc, LambdaRegularization)
    return(LossERM(X, y, wv[:,0], wv[:,1], sigmafunc, LambdaRegularization))

Full GD function

In [56]:
def GDERM(X, y, wv, sigmafunc, Dsigmafunc, LambdaRegularization = 1, LearningRate = 0.02, MaxIter = 1e4, EpsConvergence = 1e-6, Verbose = True, VerboseRate = 100):
    Conv = 1
    NIter = 0
    Losses = [LossERM(X, y, wv[:,0], wv[:,1], sigmafunc, LambdaRegularization)]
    print("Iteration %s" % NIter)
    print("Current loss %s" % Losses[NIter])
    while(NIter < MaxIter and Conv > EpsConvergence):
        Losses.append(GDStepERM(X, y, wv, sigmafunc, Dsigmafunc, LambdaRegularization, LearningRate))
        NIter = NIter + 1
        if(Verbose and NIter%VerboseRate == 0):
            print("Iteration %s" % NIter)
            print("Current loss %s" % Losses[NIter])
    return(np.array(Losses))
        

Sigma Functions for $\sigma(z_v) = z_v^2$

In [57]:
def sigmaSquare(zv):
    return(np.power(zv,2))

def DsigmaSquare(zv):
    return(2*zv)

Main variables 

In [74]:
d = 100
M = 1000
alpha = M/d
LearningRate = 0.002
LambdaRegularization = 1
X = rnd.normal(0, 1/np.sqrt(d), size = (M, d))
wvTrue = rnd.normal(0, 1, size = (d, 2))
wvLearned = rnd.normal(0, 1, size = (d, 2))
yTrue = np.matmul(X, wvTrue[:,0])
yNoisy = yTrue + rnd.normal(0, np.sqrt(sigmaSquare(np.matmul(X, wvTrue[:,1]))))

Runner

In [70]:
Losses = GDERM(X, yNoisy, wvLearned, sigmaSquare, DsigmaSquare, MaxIter = 10, VerboseRate = 1)

Iteration 0
Current loss 59484823.959240444
[ -2379324.29588064  -2548383.82840931   8125840.26539826
   5071264.79486226   2540331.35944241  -2711852.94127258
  -3523498.32367224  -2095488.29930469    -71542.78647092
    967479.09464746   9469743.47635025  -4860308.65678998
   5727795.38936456  -1114857.631439    -2442067.57609841
   9474919.19313389   6711752.45373026   3117425.27356141
   3333486.33202621   2138398.56391295  -8018682.662986
  -6361697.05471797  -2273715.4894636     -92334.58526826
   2087577.65970374   7495270.18510313  -2300957.3650478
   3995579.79030584  -2062223.67345905   3862629.73486652
    516200.02098819   2574210.31509288   1906595.35761303
  12780266.85732502  -3201826.26093173  -5466602.07288034
   1649327.52629763  -2797798.38133188    977506.85695739
   1821235.58121013    662007.61494559   3200652.17365096
  -5099615.76017758    687862.63714979   1045943.77565264
  -2803737.51767674   3622018.45045021   8096357.87902031
  -4425346.15074774  -3978253.7

In [71]:
LossERM(X, yNoisy, wvLearned[:,0], wvLearned[:,1], sigmaSquare, LambdaRegularization)

59484823.959240444

In [75]:
GradERM(X, yNoisy, wvLearned[:,0], wvLearned[:,1], sigmaSquare, DsigmaSquare, LambdaRegularization)

[  49593.68367851   34443.15341598   -3022.04192241   36890.66913666
   45822.76315929  -70323.46464283  -16962.02861929  183741.83408912
   21059.12775894  -31123.83536293  -13299.77593345  -68309.76431084
    1308.37944848  -59886.39396546  -26517.98345256  -30660.54747677
 -120590.62475725   14396.07769143   72124.60920581   24420.19202397
   -2947.94940668  -94452.37321345 -199798.36171598  -42251.44146127
   40849.55784148  -14268.98913735   43907.65494082    7246.81647147
  -99195.25820932    9730.45123698   90794.42513757   48770.75169475
   51026.42320602  -32650.569519     13660.5210131    45114.15655233
  -25463.83827988  -33352.97173887   34493.81437162    9100.76790778
 -109374.57003783   99857.70841802  -18662.37051881   12439.10342329
  -76765.87887154   27734.87086155 -109809.86768798  110580.21491603
   17868.46921544  186342.67706753  -32410.04689115   81926.8246304
   24763.80374885  -27456.91899634  114198.21443128 -122686.79660274
  -54371.76758036   52190.11039719 

array([[ 4.99407703e+04,  1.30174832e+08],
       [ 3.50061212e+04,  8.44895326e+07],
       [-3.65073158e+03,  8.85487413e+06],
       [ 3.69199275e+04,  1.02234215e+08],
       [ 4.54031058e+04,  1.13700128e+08],
       [-7.03782519e+04, -1.75907041e+08],
       [-1.70485859e+04, -6.07877902e+07],
       [ 1.82935007e+05,  4.38092248e+08],
       [ 2.07926376e+04,  5.81759977e+07],
       [-3.20734502e+04, -5.77958047e+07],
       [-1.00014683e+04, -6.03655474e+07],
       [-6.88837404e+04, -1.70447415e+08],
       [ 1.06066046e+03,  1.99920021e+06],
       [-5.76532279e+04, -9.82071197e+07],
       [-2.83166729e+04, -6.43677269e+07],
       [-3.05202626e+04, -9.35706385e+07],
       [-1.18787587e+05, -2.68781994e+08],
       [ 1.43264163e+04,  4.63482411e+07],
       [ 7.14891113e+04,  1.62824974e+08],
       [ 2.37205994e+04,  2.94212250e+07],
       [-1.99481124e+03,  1.30150181e+07],
       [-9.53840540e+04, -2.52015010e+08],
       [-1.97763947e+05, -4.49487582e+08],
       [-4.