In [1]:
import numpy as np
X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
model.fit(X) 
NMF(alpha=0.0, beta=1, eta=0.1, init='random', l1_ratio=0.0, max_iter=200,
  n_components=2, nls_max_iter=2000, random_state=0, shuffle=False,
  solver='cd', sparseness=None, tol=0.0001, verbose=0)

NMF(alpha=0.0, beta=1, eta=0.1, init='random', l1_ratio=0.0, max_iter=200,
  n_components=2, nls_max_iter=2000, random_state=0, shuffle=False,
  solver='cd', sparseness=None, tol=0.0001, verbose=0)

In [2]:
model.components_

array([[ 2.09783018,  0.30560234],
       [ 2.13443044,  2.13171694]])

In [3]:
model.reconstruction_err_ 

0.001159934921600414

In [3]:
import nimfa.examples
nimfa.examples.recommendations.run()

Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113885.398
            - Sparseness basis: 0.145, mixture: 0.429
RMSE: 1.686
Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113570.262
            - Sparseness basis: 0.144, mixture: 0.475
RMSE: 1.696


In [4]:


from os.path import dirname, abspath
from os.path import join
from warnings import warn

import numpy as np

import nimfa
import metrics


try:
    import matplotlib.pylab as plb
except ImportError as exc:
    warn("Matplotlib must be installed to run Recommendations example.")


def run():
    """
    Run SNMF/R on the MovieLens data set.
    
    Factorization is run on `ua.base`, `ua.test` and `ub.base`, `ub.test` data set. This is MovieLens's data set split 
    of the data into training and test set. Both test data sets are disjoint and with exactly 10 ratings per user
    in the test set. 
    """
    for data_set in ['ua', 'ub']:
        V = read(data_set)
        W, H = factorize(V)
        rmse(W, H, data_set)


def factorize(V):
    """
    Perform SNMF/R factorization on the sparse MovieLens data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The MovieLens data matrix. 
    :type V: `numpy.matrix`
    """
    snmf = nimfa.Snmf(V, seed="random_vcol", rank=30, max_iter=30, version='r', eta=1.,
                      beta=1e-4, i_conv=10, w_min_change=0)
    print("Algorithm: %s\nInitialization: %s\nRank: %d" % (snmf, snmf.seed, snmf.rank))
    fit = snmf()
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter,
                                                            fit.distance(metric='euclidean'),
                                                            sparse_w, sparse_h))
    return fit.basis(), fit.coef()


def read(data_set):
    """
    Read movies' ratings data from MovieLens data set. 
    
    :param data_set: Name of the split data set to be read.
    :type data_set: `str`
    """
    print("Read MovieLens data set")
    fname = join(dirname('/home/clay/anaconda2/lib/python2.7/site-packages/nimfa/'), "datasets", "MovieLens", "%s.base" % data_set)
    V = np.ones((943, 1682)) * 2.5
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        V[u - 1, i - 1] = r
    return V


def rmse(W, H, data_set):
    """
    Compute the RMSE error rate on MovieLens data set.
    
    :param W: Basis matrix of the fitted factorization model.
    :type W: `numpy.matrix`
    :param H: Mixture matrix of the fitted factorization model.
    :type H: `numpy.matrix`
    :param data_set: Name of the split data set to be read. 
    :type data_set: `str`
    """
    fname = join(dirname('/home/clay/anaconda2/lib/python2.7/site-packages/nimfa/'), "datasets", "MovieLens", "%s.test" % data_set)
    rmse = []
    print 'fname', fname
    test = np.zeros((943, 1682)) 
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        test[u-1][i-1] = r
        sc = max(min((W[u - 1, :] * H[:, i - 1])[0, 0], 5), 1)
        rmse.append((sc - r) ** 2)
    print("RMSE: %5.3f" % np.mean(rmse))
    VV = np.asarray(np.dot(W,H))
    for k in [1,3,5,10]:
        print 'k =', k
        counts = 0
        pres = 0
        ndcg = 0
        for ii, user in enumerate(VV):
            counts+=1
            r = np.argsort(user)[::-1]
            rr = []
            for jj in (r):
                if test[ii][jj] != 0:
                    rr.append(0 if VV[ii][jj]-0.49 < test[ii][jj] else 1)
            #print rr
            pres += metrics.precision_at_k(rr,k)
            ndcg += metrics.ndcg_at_k(rr,k)
        print k," is k and precision_at_k: ", pres*1.0/counts
        print k," is k and ndcg_at_k: ", ndcg*1.0/counts

if __name__ == "__main__":
    """Run the Recommendations example."""
    run()


Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113912.708
            - Sparseness basis: 0.142, mixture: 0.505
fname /home/clay/anaconda2/lib/python2.7/site-packages/nimfa/datasets/MovieLens/ua.test
RMSE: 1.682
k = 1
1  is k and precision_at_k:  0.0954400848356
1  is k and ndcg_at_k:  0.0954400848356
k = 3
3  is k and precision_at_k:  0.0968540120184
3  is k and ndcg_at_k:  0.128437374201
k = 5
5  is k and precision_at_k:  0.113043478261
5  is k and ndcg_at_k:  0.183024020286
k = 10
10  is k and precision_at_k:  0.150689289502
10  is k and ndcg_at_k:  0.348710108089
Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113693.194
            - Sparseness basis: 0.148, mixture: 0.427
fname /home/clay/anaconda2/lib/python2.7/site-packages/nimfa/datasets/MovieLens/ub.test
RMSE: 1.695
k = 1


In [38]:
user = [ 4.74850767,  2.82699823,  3.08403436]
r= np.argsort(user)[::-1]
print r

[0 2 1]


In [1]:


from os.path import dirname, abspath
from os.path import join
from warnings import warn

import numpy as np

import nimfa
import metrics


try:
    import matplotlib.pylab as plb
except ImportError as exc:
    warn("Matplotlib must be installed to run Recommendations example.")


def run():
    """
    Run SNMF/R on the MovieLens data set.
    
    Factorization is run on `ua.base`, `ua.test` and `ub.base`, `ub.test` data set. This is MovieLens's data set split 
    of the data into training and test set. Both test data sets are disjoint and with exactly 10 ratings per user
    in the test set. 
    """
    for data_set in ['ua', 'ub']:
        V = read(data_set)
        W, H = factorize(V)
        rmse(W, H, data_set)


def factorize(V):
    """
    Perform SNMF/R factorization on the sparse MovieLens data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The MovieLens data matrix. 
    :type V: `numpy.matrix`
    """
    snmf = nimfa.Snmf(V, seed="random_vcol", rank=30, max_iter=30, version='r', eta=1.,
                      beta=1e-4, i_conv=10, w_min_change=0)
    print("Algorithm: %s\nInitialization: %s\nRank: %d" % (snmf, snmf.seed, snmf.rank))
    fit = snmf()
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter,
                                                            fit.distance(metric='euclidean'),
                                                            sparse_w, sparse_h))
    return fit.basis(), fit.coef()


def read(data_set):
    """
    Read movies' ratings data from MovieLens data set. 
    
    :param data_set: Name of the split data set to be read.
    :type data_set: `str`
    """
    print("Read MovieLens data set")
    fname = join(dirname('/home/clay/anaconda2/lib/python2.7/site-packages/nimfa/'), "datasets", "MovieLens", "%s.base" % data_set)
    V = np.ones((943, 1682)) * 2.5
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        V[u - 1, i - 1] = r
    return V


def rmse(W, H, data_set):
    """
    Compute the RMSE error rate on MovieLens data set.
    
    :param W: Basis matrix of the fitted factorization model.
    :type W: `numpy.matrix`
    :param H: Mixture matrix of the fitted factorization model.
    :type H: `numpy.matrix`
    :param data_set: Name of the split data set to be read. 
    :type data_set: `str`
    """
    fname = join(dirname('/home/clay/anaconda2/lib/python2.7/site-packages/nimfa/'), "datasets", "MovieLens", "%s.test" % data_set)
    rmse = []
    print 'fname', fname
    test = np.zeros((943, 1682)) 
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        test[u-1][i-1] = r
        sc = max(min((W[u - 1, :] * H[:, i - 1])[0, 0], 5), 1)
        rmse.append((sc - r) ** 2)
    print("RMSE: %5.3f" % np.mean(rmse))
    VV = np.asarray(np.dot(W,H))
    for k in [1,3,5,10]:
        print 'k =', k
        counts = 0
        pres = 0
        ndcg = 0
        for ii, user in enumerate(VV):
            counts+=1
            r = np.argsort(user)[::-1]
            rr = []
            for jj in (r):
                if test[ii][jj] != 0:
                    rr.append(0 if abs(VV[ii][jj] - test[ii][jj]) < 0.49 else 1)
            #print rr
            pres += metrics.precision_at_k(rr,k)
            ndcg += metrics.ndcg_at_k(rr,k)
        print k," is k and precision_at_k: ", pres*1.0/counts
        print k," is k and ndcg_at_k: ", ndcg*1.0/counts

if __name__ == "__main__":
    """Run the Recommendations example."""
    run()


Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113875.043
            - Sparseness basis: 0.143, mixture: 0.437
fname /home/clay/anaconda2/lib/python2.7/site-packages/nimfa/datasets/MovieLens/ua.test
RMSE: 1.683
k = 1
1  is k and precision_at_k:  0.725344644751
1  is k and ndcg_at_k:  0.725344644751
k = 3
3  is k and precision_at_k:  0.738776952987
3  is k and ndcg_at_k:  0.73868415281
k = 5
5  is k and precision_at_k:  0.741463414634
5  is k and ndcg_at_k:  0.744662552142
k = 10
10  is k and precision_at_k:  0.741993637328
10  is k and ndcg_at_k:  0.86929369075
Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113807.549
            - Sparseness basis: 0.141, mixture: 0.477
fname /home/clay/anaconda2/lib/python2.7/site-packages/nimfa/datasets/MovieLens/ub.test
RMSE: 1.697
k = 1
1  is

In [29]:


from os.path import dirname, abspath
from os.path import join
from warnings import warn

import numpy as np

import nimfa
import metrics


try:
    import matplotlib.pylab as plb
except ImportError as exc:
    warn("Matplotlib must be installed to run Recommendations example.")


def run():
    """
    Run SNMF/R on the MovieLens data set.
    
    Factorization is run on `ua.base`, `ua.test` and `ub.base`, `ub.test` data set. This is MovieLens's data set split 
    of the data into training and test set. Both test data sets are disjoint and with exactly 10 ratings per user
    in the test set. 
    """
    for rank in [5,8,12,10,30,50]:
        for data_set in ['ua', 'ub']:
            V = read(data_set)
            W, H = factorize(V, rank)
            rmse(W, H, data_set)


def factorize(V,rank):
    """
    Perform SNMF/R factorization on the sparse MovieLens data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The MovieLens data matrix. 
    :type V: `numpy.matrix`
    """
    snmf = nimfa.Snmf(V, seed="random_vcol", rank=rank, max_iter=30, version='r', eta=1.,
                      beta=1e-4, i_conv=10, w_min_change=0)
    print("Algorithm: %s\nInitialization: %s\nRank: %d" % (snmf, snmf.seed, snmf.rank))
    fit = snmf()
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter,
                                                            fit.distance(metric='euclidean'),
                                                            sparse_w, sparse_h))
    return fit.basis(), fit.coef()


def read(data_set):
    """
    Read movies' ratings data from MovieLens data set. 
    
    :param data_set: Name of the split data set to be read.
    :type data_set: `str`
    """
    print("Read MovieLens data set")
    fname = join(dirname('C:/Anaconda2/Lib/site-packages/nimfa/'), "datasets", "MovieLens", "%s.base" % data_set)
    V = np.ones((943, 1682)) * 2.5
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        V[u - 1, i - 1] = r
    return V


def rmse(W, H, data_set):
    """
    Compute the RMSE error rate on MovieLens data set.
    
    :param W: Basis matrix of the fitted factorization model.
    :type W: `numpy.matrix`
    :param H: Mixture matrix of the fitted factorization model.
    :type H: `numpy.matrix`
    :param data_set: Name of the split data set to be read. 
    :type data_set: `str`
    """
    fname = join(dirname('C:/Anaconda2/Lib/site-packages/nimfa/'), "datasets", "MovieLens", "%s.test" % data_set)
    rmse = []
    print 'fname', fname
    test = np.zeros((943, 1682)) 
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        test[u-1][i-1] = r
        sc = max(min((W[u - 1, :] * H[:, i - 1])[0, 0], 5), 1)
        rmse.append((sc - r) ** 2)
    print("RMSE: %5.3f" % np.mean(rmse))
    VV = np.asarray(np.dot(W,H))
    for k in [1,3,5,10]:
        print 'k =', k
        presicion = []
        recall = []
        for ii, user in enumerate(VV):
            hits = 0
            a = test[ii,]
            a = a[np.nonzero(a)]
            """print a[np.nonzero(a)]
            print VV[ii,][np.nonzero(a)]
            print VV[ii,][np.nonzero(VV[ii,])]
            import sys
            sys.exit()"""
            mean = np.mean(np.array(a)) 
            r = np.argsort(user)[::-1]
            rr = []
            d = 0
            for jj in (r):
                if test[ii][jj] != 0 and d < k:
                    d += 1
                    if VV[ii][jj] >= mean:
                        hits += 1
            presicion.append(hits*1.0/k)
            recall.append(hits*1.0/len(a))
        print k," is k and precision_at_k: ", np.mean(np.array(presicion)) 
        print k," is k and recall_at_k: ", np.mean(np.array(recall)) 

if __name__ == "__main__":
    """Run the Recommendations example."""
    run()


Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 5
Stats:
            - iterations: 30
            - Euclidean distance: 145790.799
            - Sparseness basis: 0.049, mixture: 0.495
fname C:/Anaconda2/Lib/site-packages/nimfa\datasets\MovieLens\ua.test
RMSE: 1.755
k = 1
1  is k and precision_at_k:  0.29586426299
1  is k and recall_at_k:  0.029586426299
k = 3
3  is k and precision_at_k:  0.197949805585
3  is k and recall_at_k:  0.0593849416755
k = 5
5  is k and precision_at_k:  0.144856839873
5  is k and recall_at_k:  0.0724284199364
k = 10
10  is k and precision_at_k:  0.0908801696713
10  is k and recall_at_k:  0.0908801696713
Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 5
Stats:
            - iterations: 30
            - Euclidean distance: 145543.431
            - Sparseness basis: 0.050, mixture: 0.548
fname C:/Anaconda2/Lib/site-packages/nimfa\datasets\MovieLens\ub.test
RMSE: 1.770
k = 1
1  is k and precision_at_k:  0

IndexError: index 6 is out of bounds for axis 1 with size 3

In [28]:


from os.path import dirname, abspath
from os.path import join
from warnings import warn

import numpy as np

import nimfa
import metrics


try:
    import matplotlib.pylab as plb
except ImportError as exc:
    warn("Matplotlib must be installed to run Recommendations example.")


def run():
    """
    Run SNMF/R on the MovieLens data set.
    
    Factorization is run on `ua.base`, `ua.test` and `ub.base`, `ub.test` data set. This is MovieLens's data set split 
    of the data into training and test set. Both test data sets are disjoint and with exactly 10 ratings per user
    in the test set. 
    """
    for rank in [5,8,12,10,30,50]:
        for data_set in ['ua', 'ub']:
            V = read(data_set)
            W, H = factorize(V, rank)
            rmse(W, H, data_set)


def factorize(V,rank):
    """
    Perform SNMF/R factorization on the sparse MovieLens data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The MovieLens data matrix. 
    :type V: `numpy.matrix`
    """
    snmf = nimfa.Snmf(V, seed="random_vcol", rank=rank, max_iter=2, version='r', eta=1.,
                      beta=1e-4, i_conv=10, w_min_change=0)
    print("Algorithm: %s\nInitialization: %s\nRank: %d" % (snmf, snmf.seed, snmf.rank))
    fit = snmf()
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter,
                                                            fit.distance(metric='euclidean'),
                                                            sparse_w, sparse_h))
    return fit.basis(), fit.coef()


def read(data_set):
    """
    Read movies' ratings data from MovieLens data set. 
    
    :param data_set: Name of the split data set to be read.
    :type data_set: `str`
    """
    print("Read MovieLens data set")
    fname = join(dirname('C:/Anaconda2/Lib/site-packages/nimfa/'), "datasets", "MovieLens", "%s.base" % data_set)
    V = np.ones((943, 1682)) * 2.5
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        V[u - 1, i - 1] = r
    return V


def rmse(W, H, data_set):
    """
    Compute the RMSE error rate on MovieLens data set.
    
    :param W: Basis matrix of the fitted factorization model.
    :type W: `numpy.matrix`
    :param H: Mixture matrix of the fitted factorization model.
    :type H: `numpy.matrix`
    :param data_set: Name of the split data set to be read. 
    :type data_set: `str`
    """
    fname = join(dirname('C:/Anaconda2/Lib/site-packages/nimfa/'), "datasets", "MovieLens", "%s.test" % data_set)
    rmse = []
    print 'fname', fname
    test = np.zeros((943, 1682)) 
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        test[u-1][i-1] = r
        sc = max(min((W[u - 1, :] * H[:, i - 1])[0, 0], 5), 1)
        rmse.append((sc - r) ** 2)
    print("RMSE: %5.3f" % np.mean(rmse))
    VV = np.asarray(np.dot(W,H))
    for k in [1,3,5,10]:
        print 'k =', k
        presicion = []
        recall = []
        for ii, user in enumerate(VV):
            hits = 0
            a = test[ii,]
            a = a[np.nonzero(a)]
            mean = np.mean(np.array(a)) 
            r = np.argsort(user)[::-1][:k]
            rr = []
            d = 0
            for jj in (r):
                if test[ii][jj] != 0:
                    hits += 1
            presicion.append(hits*1.0/k)
            recall.append(hits*1.0/len(a))
        print k," is k and precision_at_k: ", np.mean(np.array(presicion)) 
        print k," is k and recall_at_k: ", np.mean(np.array(recall)) 

if __name__ == "__main__":
    """Run the Recommendations example."""
    run()


Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 5
Stats:
            - iterations: 2
            - Euclidean distance: 155199.836
            - Sparseness basis: 0.004, mixture: 0.805
fname C:/Anaconda2/Lib/site-packages/nimfa\datasets\MovieLens\ua.test
RMSE: 1.832
k = 1
1  is k and precision_at_k:  0.0954400848356
1  is k and recall_at_k:  0.00954400848356
k = 3
3  is k and precision_at_k:  0.0911983032874
3  is k and recall_at_k:  0.0273594909862
k = 5
5  is k and precision_at_k:  0.0928950159067
5  is k and recall_at_k:  0.0464475079533
k = 10
10  is k and precision_at_k:  0.0845174973489
10  is k and recall_at_k:  0.0845174973489
Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 5
Stats:
            - iterations: 2
            - Euclidean distance: 154854.492
            - Sparseness basis: 0.004, mixture: 0.805
fname C:/Anaconda2/Lib/site-packages/nimfa\datasets\MovieLens\ub.test
RMSE: 1.839
k = 1
1  is k and precision_at_k