In [1]:
import scipy.io
import numpy as np
from scipy.sparse import csr_matrix
from projL1 import projL1
from time import time

In [2]:
def projNuc(Z, kappa):
    #PROJNUC This function implements the projection onto nuclear norm ball.
    u, s, vh = np.linalg.svd(Z, full_matrices=False)
    s_l1 = projL1(s, kappa)
    return (u * s_l1) @ vh

This function performs 5 runs and report average time

In [3]:
def averaged_time(Z, kappa, data_takes, num_runs=10):
    times = []
    for i in range(1, num_runs+1):
        tstart = time()
        Z_proj = projNuc(Z, kappa)
        elapsed = time() - tstart
        times.append(elapsed)
        print(str(i) + '. Proj for ' + data_takes + ' data took {} sec'.format(elapsed))
    print('This process took {:.3f}+-{:.3f} sec after analizing {} runs'.format(np.mean(times),
                                                                                np.std(times), num_runs))

In [4]:
data = scipy.io.loadmat('./dataset/ml-100k/ub_base')  # load 100k dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float).toarray()
kappa = 5000

averaged_time(Z, kappa, '100k')

1. Proj for 100k data took 1.5467145442962646 sec
2. Proj for 100k data took 1.3932392597198486 sec
3. Proj for 100k data took 1.3941683769226074 sec
4. Proj for 100k data took 1.3770391941070557 sec
5. Proj for 100k data took 1.3610055446624756 sec
6. Proj for 100k data took 1.3536536693572998 sec
7. Proj for 100k data took 1.357839822769165 sec
8. Proj for 100k data took 1.646808385848999 sec
9. Proj for 100k data took 1.7229139804840088 sec
10. Proj for 100k data took 1.3634488582611084 sec
This process took 1.452+-0.129 sec after analizing 10 runs


In [5]:
# NOTE: This one can take few minutes!
data = scipy.io.loadmat('./dataset/ml-1m/ml1m_base')  # load 1M dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float).toarray()
kappa = 5000

averaged_time(Z, kappa, '1M')

1. Proj for 1M data took 76.16440486907959 sec
2. Proj for 1M data took 74.99347686767578 sec
3. Proj for 1M data took 74.62046027183533 sec
4. Proj for 1M data took 81.9343888759613 sec
5. Proj for 1M data took 76.35615754127502 sec
6. Proj for 1M data took 74.83433938026428 sec
7. Proj for 1M data took 82.34572529792786 sec
8. Proj for 1M data took 82.20451354980469 sec
9. Proj for 1M data took 75.76599168777466 sec
10. Proj for 1M data took 74.54405355453491 sec
This process took 77.376+-3.188 sec after analizing 10 runs
