In [10]:
import numpy as np
from scipy import optimize

In [11]:
def obj(theta, nusers, nmovies, X):
    M = X.astype(np.bool)
    u = theta[:nusers].reshape(-1, 1)
    v = theta[nusers:].reshape(-1, 1)
    return np.sum( (M*(u@v.T) - X)**2 )

def fprime(theta, nusers, nmovies, X):
    M = X.astype(np.bool)
    u = theta[:nusers].reshape(-1, 1)
    v = theta[nusers:].reshape(-1, 1)
    vhat = M.T * v
    uhat = M * u
    gradu = np.sum(vhat * (v@u.T - X.T), axis=1).reshape(-1, 1)
    gradv = np.sum(uhat * (u@v.T - X), axis=1).reshape(-1, 1)
    return np.vstack([gradu, gradv])

In [28]:
from dataset import MovieLensDataset
# d = MovieLensDataset('./data/')
d = MovieLensDataset('/Users/nick/test/')
X = d.X.A
M = X.astype(np.bool)

u = np.abs(np.random.randn(d.n_users))
u /= np.linalg.norm(u)
v = np.abs(np.random.randn(d.n_movies))
v /= np.linalg.norm(v)

Loading MovieLens dataset from /Users/nick/test/ with mode sparse..
Column names are userId, movieId, rating, timestamp
Processed 1000 lines. 7 users x 801 movies.
Dataset contains 999 ratings (17.81701444622793% matrix density)


In [29]:
theta0 = np.hstack([u, v])
theta0 = theta0.reshape(-1, 1)
print(theta0.shape)
obj(theta0, d.n_users, d.n_movies, X)

(808, 1)


59190.75180150526

In [114]:
root = optimize.newton(obj, theta0, fprime=fprime, args=(d.n_users, X, ))

IndexError: too many indices for array: array is 0-dimensional, but 2 were indexed

In [33]:
root = optimize.minimize(obj, theta0, args=(d.n_users, X, ))

KeyboardInterrupt: 

# Second order model

In [7]:
def fsecond(theta, m, n, X):
    M = X.astype(np.bool)
    u = theta[:m].reshape(-1, 1)
    v = theta[m:].reshape(-1, 1)
    vhat = M.T * v
    vhat_norm_vect = np.sum(vhat * vhat, axis=1)
    uhat = M * u
    uhat_norm_vect = np.sum(uhat * uhat, axis=1)
    hessian = np.zeros((m+n, m+n), dtype=np.float32) # change to 64
#     print(vhat_norm_vect.shape, uhat_norm_vect.shape)
#     print(hessian.shape, m, n, u.shape, v.shape, (2*(v@u.T)*M.T-X.T).shape, hessian[n:, :m].shape)
    err = 2*(u@v.T)*M-X
    hessian[n:, :n] = err
    hessian[:n, n:] = err.T #2*(v@u.T)*M.T-X.T
    
    # set diag
    row,col = np.diag_indices(hessian.shape[0])
    hessian[row, col] = np.hstack([vhat_norm_vect, uhat_norm_vect])
    
    return hessian

In [149]:
print(fprime(theta0, d.n_users, d.n_movies, X).shape)
fsecond(theta0, d.n_users, d.n_movies, X).shape

(808, 1)


(808, 808)

In [23]:
theta0 = np.random.randn(d.n_movies + d.n_users).reshape(-1, 1)

In [17]:
def rho_f(alpha_):
    return obj(theta+alpha_*direction, d.n_users, d.n_movies, X)

In [22]:
from tqdm import tqdm
theta = theta0
max_iter = 100
m1 = 0.001
delta = 1e-6
for i in tqdm(range(max_iter)):
    grad = fprime(theta, d.n_users, d.n_movies, X)
    gnorm = np.linalg.norm(grad)
#     grad /= gnorm
    print("Grad norm:", gnorm)
    if gnorm < 1e-6:
        print('done!')
        break
    hess = fsecond(theta, d.n_users, d.n_movies, X)
    # compute smallest eigen val of hes (hessian correction)
#     lambdas = np.linalg.eigvals(hess)
    # most computational intensive part O(n^3)
    lambdas, _ = np.linalg.eigh(hess)
#     print('lamda n', np.min(lambdas), lambdas[0])
    eps = max(0., delta-lambdas[0])
    print("Eps:", eps)
    hess += eps * np.eye(theta.shape[0])
    direction = -np.linalg.solve(hess, grad)
#     direction /= np.linalg.norm(direction)
#     print('dir norm', np.linalg.norm(direction))
    
    # backtracking LS + Armijo
    alpha = 1.
    step = .9
    # grad@direction = derivative of rho(0)
#     drho0 = grad.T@direction
#     drho0 /= np.linalg.norm(drho0)
#     print("drho0", drho0)
    alpha = optimize.minimize(rho_f, alpha)
    alpha = alpha.x
#     while obj(theta+alpha*direction, d.n_users, d.n_movies, X) > (obj(theta, d.n_users, d.n_movies, X) + m1*alpha*drho0):
#         alpha *= step
    print('alpha', alpha)
    theta = theta + alpha * direction

  1%|          | 1/100 [00:00<00:13,  7.24it/s]

Grad norm: 103491.23605995566
Eps: 266.9239206777344
alpha [1.03085596e-09]
Grad norm: 103526.99560431429
Eps: 266.56207375390625


  3%|▎         | 3/100 [00:00<00:11,  8.25it/s]

alpha [3.29418257e-10]
Grad norm: 103528.07758721132
Eps: 266.55169777734375
alpha [7.18479453e-10]
Grad norm: 103530.42647152729
Eps: 266.529297875
alpha [-2.04378106e-10]
Grad norm: 103529.95637970655


  6%|▌         | 6/100 [00:00<00:10,  8.77it/s]

Eps: 266.53375344140625
alpha [-3.03068679e-10]
Grad norm: 103528.48961912445
Eps: 266.5477610097656
alpha [-2.32662349e-11]
Grad norm: 103526.87608438196


  8%|▊         | 8/100 [00:00<00:10,  8.92it/s]

Eps: 266.563233421875
alpha [4.53743501e-11]
Grad norm: 103527.332851383
Eps: 266.5588694082031
alpha [2.34952375e-10]
Grad norm: 103528.52422251899


 10%|█         | 10/100 [00:01<00:10,  8.67it/s]

Eps: 266.5474558339844
alpha [3.03404727e-11]
Grad norm: 103528.60650320198
Eps: 266.5466623769531
alpha [-3.81501769e-11]
Grad norm: 103528.44232325593


 12%|█▏        | 12/100 [00:01<00:10,  8.69it/s]

Eps: 266.5482492910156
alpha [-1.04358997e-11]
Grad norm: 103528.40629454794
Eps: 266.548584984375
alpha [2.6279997e-10]
Grad norm: 103529.0672594982


 14%|█▍        | 14/100 [00:01<00:09,  9.22it/s]

Eps: 266.5422678457031
alpha [1.38737266e-10]
Grad norm: 103529.66644129313
Eps: 266.5365305410156
alpha [-8.61782498e-11]
Grad norm: 103530.07718713871
Eps: 266.5326242910156


 16%|█▌        | 16/100 [00:01<00:09,  9.31it/s]

alpha [2.83554538e-10]
Grad norm: 103523.79433485844
Eps: 266.59320168359375
alpha [2.23324006e-10]
Grad norm: 103529.42665777728
Eps: 266.5390940175781


 18%|█▊        | 18/100 [00:01<00:08,  9.54it/s]

alpha [1.41660495e-10]
Grad norm: 103529.78196122765
Eps: 266.53570656640625
alpha [9.33520811e-12]
Grad norm: 103529.8213564196
Eps: 266.5353098378906
alpha [-9.69661389e-11]
Grad norm: 103530.24890265433


 20%|██        | 20/100 [00:02<00:08,  9.71it/s]

Eps: 266.531251
alpha [-1.86419367e-10]
Grad norm: 103516.51867955456
Eps: 266.6656809316406
alpha [4.56838265e-09]
Grad norm: 103531.04074551506
Eps: 266.5251780019531


 24%|██▍       | 24/100 [00:02<00:07, 10.18it/s]

alpha [-8.50454751e-10]
Grad norm: 103529.13109939071
Eps: 266.5433664785156
alpha [-6.6446537e-10]
Grad norm: 103524.10691839551
Eps: 266.59185891015625
alpha [6.45784665e-10]
Grad norm: 103530.93373092481


 26%|██▌       | 26/100 [00:02<00:07, 10.10it/s]

Eps: 266.5264597402344
alpha [-1.88833191e-10]
Grad norm: 103530.06368424864
Eps: 266.53473000390625
alpha [7.02894357e-11]
Grad norm: 103526.45121387253
Eps: 266.56939797265625


 28%|██▊       | 28/100 [00:02<00:07,  9.84it/s]

alpha [-2.70062289e-10]
Grad norm: 103527.95838484369
Eps: 266.5549021230469
alpha [-1.95665516e-10]
Grad norm: 103529.73599916842
Eps: 266.5379343496094


 30%|███       | 30/100 [00:03<00:07,  9.55it/s]

alpha [-4.81521514e-11]
Grad norm: 103529.18383354691
Eps: 266.5431833730469
alpha [-6.46432198e-11]
Grad norm: 103529.50971988885
Eps: 266.54010109765625
alpha [-7.052944e-11]


 33%|███▎      | 33/100 [00:03<00:06,  9.87it/s]

Grad norm: 103529.27812391368
Eps: 266.54229836328125
alpha [3.56771418e-10]
Grad norm: 103530.91498208317
Eps: 266.5267038808594
alpha [-5.36422071e-11]
Grad norm: 103530.1835264318


 34%|███▍      | 34/100 [00:03<00:06,  9.87it/s]

Eps: 266.53369240625
alpha [-1.09582136e-10]
Grad norm: 103529.94594159056
Eps: 266.5359201894531
alpha [3.54743623e-10]
Grad norm: 103523.19760133595
Eps: 266.60113625390625


 37%|███▋      | 37/100 [00:03<00:06,  9.76it/s]

alpha [-1.46014967e-10]
Grad norm: 103469.20921658362
Eps: 267.20636086328125
alpha [1.48359316e-08]
Grad norm: 103528.19335384683
Eps: 266.5806589589844


 40%|████      | 40/100 [00:04<00:06,  9.84it/s]

alpha [-1.10582201e-10]
Grad norm: 103529.09448395204
Eps: 266.5719919667969
alpha [2.20265853e-09]
Grad norm: 103533.8099085754
Eps: 266.5269480214844
alpha [-2.3738696e-10]
Grad norm: 103532.78032915422


 42%|████▏     | 42/100 [00:04<00:05, 10.05it/s]

Eps: 266.5367441640625
alpha [-1.03484857e-10]
Grad norm: 103532.31407122192
Eps: 266.54119973046875
alpha [1.48959155e-10]
Grad norm: 103532.78089302767
Eps: 266.5367441640625


 44%|████▍     | 44/100 [00:04<00:05, 10.21it/s]

alpha [9.17785289e-11]
Grad norm: 103533.35403926847
Eps: 266.5312815175781
alpha [-1.56642877e-10]
Grad norm: 103531.59453321717
Eps: 266.5480661855469
alpha [-3.50693904e-10]
Grad norm: 103525.04690136845


 46%|████▌     | 46/100 [00:04<00:05,  9.98it/s]

Eps: 266.61151223046875
alpha [8.20944648e-10]
Grad norm: 103528.64897118014
Eps: 266.5765390859375
alpha [-5.00412633e-10]
Grad norm: 103531.97065380213


 49%|████▉     | 49/100 [00:05<00:05,  9.84it/s]

Eps: 266.5447092519531
alpha [-2.86567497e-10]
Grad norm: 109113.21062921533
Eps: 291.6317453847656
alpha [4.35786074e-07]
Grad norm: 103919.50754157625


 51%|█████     | 51/100 [00:05<00:04,  9.97it/s]

Eps: 265.76080422265625
alpha [2.80196707e-08]
Grad norm: 104004.06560522309
Eps: 265.1409006582031
alpha [-2.06346839e-09]
Grad norm: 104029.48253775724
Eps: 264.96319680078125


 53%|█████▎    | 53/100 [00:05<00:04,  9.95it/s]

alpha [-4.98396503e-10]
Grad norm: 104020.55284568782
Eps: 265.0255136953125
alpha [1.05158127e-10]
Grad norm: 104022.9577892121
Eps: 265.00872902734375


 56%|█████▌    | 56/100 [00:05<00:04, 10.08it/s]

alpha [1.81463765e-09]
Grad norm: 104030.4966415952
Eps: 264.9563303457031
alpha [2.23001694e-10]
Grad norm: 104032.30189765916
Eps: 264.94384865625
alpha [-2.85581314e-10]
Grad norm: 104030.37842026225


 58%|█████▊    | 58/100 [00:05<00:04, 10.04it/s]

Eps: 264.9571543203125
alpha [-2.78602784e-10]
Grad norm: 104022.34597782028
Eps: 265.01312355859375
alpha [2.32456633e-10]
Grad norm: 104029.48226397627
Eps: 264.9635324941406


 60%|██████    | 60/100 [00:06<00:03, 10.13it/s]

alpha [2.34772208e-10]
Grad norm: 104031.51137361466
Eps: 264.949463890625
alpha [-3.04805513e-11]
Grad norm: 104031.40550720275
Eps: 264.9501963125
alpha [-3.87684477e-10]
Grad norm: 104028.42042491645


 62%|██████▏   | 62/100 [00:06<00:03, 10.05it/s]

Eps: 264.9709177480469
alpha [8.83849253e-11]
Grad norm: 104029.53542556187
Eps: 264.9631662832031
alpha [-1.83865909e-10]
Grad norm: 103960.73202923762
Eps: 265.46368508203125


 66%|██████▌   | 66/100 [00:06<00:03, 10.17it/s]

alpha [8.79552597e-09]
Grad norm: 104022.73596287984
Eps: 265.0211496816406
alpha [-2.41246403e-10]
Grad norm: 104053.52281016226
Eps: 264.8099680410156
alpha [-4.74771887e-11]
Grad norm: 104056.55322432106


 68%|██████▊   | 68/100 [00:06<00:03, 10.10it/s]

Eps: 264.7894297109375
alpha [-3.63000534e-09]
Grad norm: 104040.29611611516
Eps: 264.9007883535156
alpha [-5.33779246e-10]
Grad norm: 104037.0250013008
Eps: 264.92334084375


 70%|███████   | 70/100 [00:07<00:02, 10.08it/s]

alpha [-1.45644967e-10]
Grad norm: 104042.3541795998
Eps: 264.8866892324219
alpha [-9.64570286e-10]
Grad norm: 104033.85020574272
Eps: 264.9454050527344
alpha [-4.38356238e-11]


 72%|███████▏  | 72/100 [00:07<00:02, 10.15it/s]

Grad norm: 104032.96595474862
Eps: 264.9515390859375
alpha [-2.34219378e-10]
Grad norm: 104031.2429769936
Eps: 264.9635019765625
alpha [3.15689149e-10]
Grad norm: 104032.7510378247


 74%|███████▍  | 74/100 [00:07<00:02, 10.23it/s]

Eps: 264.9530344472656
alpha [-1.55253858e-10]
Grad norm: 104032.06732199417
Eps: 264.957764671875
alpha [-2.43979943e-10]
Grad norm: 104029.82638865725
Eps: 264.97332863671875


 76%|███████▌  | 76/100 [00:07<00:02, 10.12it/s]

alpha [1.61480588e-10]
Grad norm: 104031.29763147222
Eps: 264.9631052480469
alpha [3.96940731e-11]
Grad norm: 104032.48431211768
Eps: 264.95489601953125


 80%|████████  | 80/100 [00:08<00:02,  9.98it/s]

alpha [1.83608468e-10]
Grad norm: 104033.15898898157
Eps: 264.9502268300781
alpha [-4.54732965e-12]
Grad norm: 104033.13932981202
Eps: 264.9503489003906
alpha [1.66180047e-11]


 81%|████████  | 81/100 [00:08<00:01,  9.87it/s]

Grad norm: 104033.4501638184
Eps: 264.94818215234375
alpha [-2.54054976e-10]
Grad norm: 104008.740575672
Eps: 265.1220713125
alpha [5.38485673e-09]


 83%|████████▎ | 83/100 [00:08<00:01,  9.89it/s]

Grad norm: 104034.46119152007
Eps: 264.94268898828125
alpha [-1.55109156e-10]
Grad norm: 104032.01109332714
Eps: 264.9596872792969
alpha [6.25546957e-10]
Grad norm: 104040.96486112804


 85%|████████▌ | 85/100 [00:08<00:01, 10.03it/s]

Eps: 264.8979197011719
alpha [1.60318529e-10]
Grad norm: 104037.88432753905
Eps: 264.9191599355469
alpha [-1.14871551e-09]
Grad norm: 104032.09583047612
Eps: 264.9592600332031


 87%|████████▋ | 87/100 [00:08<00:01, 10.00it/s]

alpha [1.87706296e-10]
Grad norm: 104033.2896147566
Eps: 264.9509592519531
alpha [-9.53425166e-11]
Grad norm: 104026.0302091189
Eps: 265.0014963613281


 90%|█████████ | 90/100 [00:09<00:01,  9.57it/s]

alpha [2.10892293e-09]
Grad norm: 104034.72636024494
Eps: 264.941163109375
alpha [-5.23053397e-10]
Grad norm: 104032.00494610133
Eps: 264.96002297265625


 93%|█████████▎| 93/100 [00:09<00:00,  9.78it/s]

alpha [1.60080914e-10]
Grad norm: 104033.56799166331
Eps: 264.9491892324219
alpha [8.00186424e-10]
Grad norm: 104048.72549643695
Eps: 264.84521584375
alpha [-3.46283322e-09]
Grad norm: 104036.88560166655


 94%|█████████▍| 94/100 [00:09<00:00,  9.75it/s]

Eps: 264.92669777734375
alpha [-1.12466646e-10]
Grad norm: 104035.8972024101
Eps: 264.93353371484375
alpha [-1.4294609e-10]
Grad norm: 104035.42788271318
Eps: 264.936768578125


 97%|█████████▋| 97/100 [00:09<00:00,  9.93it/s]

alpha [-4.83098281e-10]
Grad norm: 104033.5095604725
Eps: 264.9500437246094
alpha [3.97713074e-10]
Grad norm: 104040.48091922555
Eps: 264.9019785390625


100%|██████████| 100/100 [00:10<00:00,  9.86it/s]

alpha [-1.63932241e-09]
Grad norm: 104032.2055291869
Eps: 264.959229515625
alpha [6.01095272e-11]
Grad norm: 104032.51097511762
Eps: 264.95709328515625
alpha [-3.35839314e-10]





In [67]:
direction = np.linalg.solve(fsecond(theta0, d.n_users, d.n_movies, X), fprime(theta0, d.n_users, X))
print(direction.shape)

(10334, 1)


In [74]:
grad = fprime(theta, d.n_users, X)
np.linalg.norm(grad)

nan

In [30]:
from scipy import optimize
root = optimize.minimize(obj, theta0, args=(d.n_users, d.n_movies,  X, ))

In [27]:
xx = root.x
grad = fprime(xx, d.n_users, d.n_movies, X)
np.linalg.norm(grad)

8320.651581673017

In [132]:
theta_init = np.abs(np.random.randn(d.n_users+d.n_movies))
sol = optimize.minimize(obj, theta_init, jac=fprime, hess=fsecond, args=(d.n_users, d.n_movies, X, ), options={'gtol': 1e-8, 'disp': True})

  warn('Method %s does not use Hessian information (hess).' % method,


In [133]:
np.linalg.norm(fprime(sol.x, d.n_users, d.n_movies, X))

107013.66903576582

In [16]:
X

array([[ 8,  8,  8, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       ...,
       [ 8,  0,  0, ...,  0,  0,  0],
       [ 0, 10,  8, ...,  0,  0,  0],
       [ 9,  0,  0, ...,  8,  6,  7]], dtype=uint8)