In [1]:
import scipy.io
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import linalg
from time import time

In [2]:
def sharpNuc(Z, kappa):
    #SHARPNUC This function implements the sharp operator for the nuclear norm ball constraint.
    u, s, vt = linalg.svds(Z, k=1)
    return - kappa * u @ vt

This function performs 5 runs and report average time

In [3]:
def averaged_time(Z, kappa, data_takes, num_runs=10):
    times = []
    for i in range(1, num_runs+1):
        tstart = time()
        Z_proj = projNuc(Z, kappa)
        elapsed = time() - tstart
        times.append(elapsed)
        print(str(i) + '. Proj for ' + data_takes + ' data took {} sec'.format(elapsed))
    print('This process took {0:.3f}+-{0:.3f} sec after analizing {} runs'.format(np.mean(times),
                                                                              np.std(times), num_runs))

This function performs 5 runs and report average time

In [4]:
def averaged_time(Z, kappa, data_takes, num_runs=10):
    times = []
    for i in range(1, num_runs+1):
        tstart = time()
        Z_proj = sharpNuc(Z, kappa)
        elapsed = time() - tstart
        times.append(elapsed)
        print(str(i) + '. Proj for ' + data_takes + ' data took {} sec'.format(elapsed))
    print('This process took {:.3f}+-{:.3f} sec after analizing {} runs'.format(np.mean(times),
                                                                                np.std(times), num_runs))

In [5]:
data = scipy.io.loadmat('./dataset/ml-100k/ub_base')  # load 100k dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float)
kappa = 5000

averaged_time(Z, kappa, '100k')

1. Proj for 100k data took 0.029695510864257812 sec
2. Proj for 100k data took 0.036905527114868164 sec
3. Proj for 100k data took 0.03251361846923828 sec
4. Proj for 100k data took 0.03148794174194336 sec
5. Proj for 100k data took 0.02295207977294922 sec
6. Proj for 100k data took 0.024388551712036133 sec
7. Proj for 100k data took 0.02313685417175293 sec
8. Proj for 100k data took 0.023251056671142578 sec
9. Proj for 100k data took 0.032680511474609375 sec
10. Proj for 100k data took 0.029968738555908203 sec
This process took 0.029+-0.005 sec after analizing 10 runs


In [6]:
# NOTE: This one can take few minutes!
data = scipy.io.loadmat('./dataset/ml-1m/ml1m_base')  # load 1M dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float)
kappa = 5000

averaged_time(Z, kappa, '1M')

1. Proj for 1M data took 0.2733113765716553 sec
2. Proj for 1M data took 0.29619622230529785 sec
3. Proj for 1M data took 0.29180097579956055 sec
4. Proj for 1M data took 0.29906344413757324 sec
5. Proj for 1M data took 0.297499418258667 sec
6. Proj for 1M data took 0.2947266101837158 sec
7. Proj for 1M data took 0.30373668670654297 sec
8. Proj for 1M data took 0.29648423194885254 sec
9. Proj for 1M data took 0.29363203048706055 sec
10. Proj for 1M data took 0.2945406436920166 sec
This process took 0.294+-0.008 sec after analizing 10 runs
