In [1]:
import scipy.io
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import linalg
from time import time

In [2]:
def lmo_nuc(Z, kappa):
    #lmo_nuc: This function implements the lmo for the nuclear norm ball constraint. .
    
    u, _, vh = linalg.svds(Z, k=1)
    return -kappa * np.outer(u, vh)

In [3]:
def run100k():
    data = scipy.io.loadmat('./dataset/ml-100k/ub_base')  # load 100k dataset

    Rating = data['Rating'].flatten()
    UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
    MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

    nM = np.amax(data['MovID'])
    nU = np.amax(data['UserID'])

    Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float)
    kappa = 5000

    tstart = time()
    Z_proj = lmo_nuc(Z, kappa)
    elapsed = time() - tstart
    print('sharp of 100k data takes {} sec'.format(elapsed))

In [4]:
# NOTE: This one can take few minutes!
def run1M():
    data = scipy.io.loadmat('./dataset/ml-1m/ml1m_base')  # load 1M dataset

    Rating = data['Rating'].flatten()
    UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
    MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

    nM = np.amax(data['MovID'])
    nU = np.amax(data['UserID'])

    Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float)
    kappa = 5000

    tstart = time()
    Z_proj = lmo_nuc(Z, kappa)
    elapsed = time() - tstart
    print('sharp of 1M data takes {} sec'.format(elapsed))

In [5]:
for i in range(5):
    print(f"Run #{i+1}")
    run100k()
    run1M()

Run #1
sharp of 100k data takes 0.04515337944030762 sec
sharp of 1M data takes 0.23843717575073242 sec
Run #2
sharp of 100k data takes 0.018034696578979492 sec
sharp of 1M data takes 0.21952199935913086 sec
Run #3
sharp of 100k data takes 0.010908842086791992 sec
sharp of 1M data takes 0.11727786064147949 sec
Run #4
sharp of 100k data takes 0.009882688522338867 sec
sharp of 1M data takes 0.13300776481628418 sec
Run #5
sharp of 100k data takes 0.008525371551513672 sec
sharp of 1M data takes 0.11441254615783691 sec
