In [1]:
import numpy as np
X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
model.fit(X) 
NMF(alpha=0.0, beta=1, eta=0.1, init='random', l1_ratio=0.0, max_iter=200,
  n_components=2, nls_max_iter=2000, random_state=0, shuffle=False,
  solver='cd', sparseness=None, tol=0.0001, verbose=0)

NMF(alpha=0.0, beta=1, eta=0.1, init='random', l1_ratio=0.0, max_iter=200,
  n_components=2, nls_max_iter=2000, random_state=0, shuffle=False,
  solver='cd', sparseness=None, tol=0.0001, verbose=0)

In [2]:
model.components_

array([[ 2.09783018,  0.30560234],
       [ 2.13443044,  2.13171694]])

In [3]:
model.reconstruction_err_ 

0.001159934921600414

In [3]:
import nimfa.examples
nimfa.examples.recommendations.run()

Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113885.398
            - Sparseness basis: 0.145, mixture: 0.429
RMSE: 1.686
Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113570.262
            - Sparseness basis: 0.144, mixture: 0.475
RMSE: 1.696


In [39]:

"""
    ##############################################
    Recommendations (``examples.recommendations``)
    ##############################################
    
    In this examples of collaborative filtering we consider movie recommendation using common MovieLens data set. It 
    represents typical cold start problem. A recommender system compares the user's profile to reference
    characteristics from the user's social environment. In the collaborative filtering approach, the recommender
    system identify users who share the same preference with the active user and propose items which the like-minded
    users favoured (and the active user has not yet seen).     
    
    We used the MovieLens 100k data set in this example. This data set consists of 100 000 ratings (1-5) from 943
    users on 1682 movies. Each user has rated at least 20 movies. Simple demographic info for the users is included. 
    Factorization is performed on a split data set as provided by the collector of the data. The data is split into 
    two disjoint sets each consisting of training set and a test set with exactly 10 ratings per user. 
    
    It is common that matrices in the field of recommendation systems are very sparse (ordinary user rates only a small
    fraction of items from the large items' set), therefore ``scipy.sparse`` matrix formats are used in this example. 
    
    The configuration of this example is SNMF/R factorization method using Random Vcol algorithm for initialization. 
    
    .. note:: MovieLens movies' rating data set used in this example is not included in the `datasets` and need to be
      downloaded. Download links are listed in the ``datasets``. Download compressed version of the MovieLens 100k. 
      To run the example, the extracted data set must exist in the ``MovieLens`` directory under ``datasets``. 
      
    .. note:: No additional knowledge in terms of ratings' timestamps, information about items and their
       genres or demographic information about users is used in this example. 
      
    To run the example simply type::
        
        python recommendations.py
        
    or call the module's function::
    
        import nimfa.examples
        nimfa.examples.recommendations.run()
        
    .. note:: This example uses ``matplotlib`` library for producing visual interpretation of the RMSE error measure. 
    
"""

from os.path import dirname, abspath
from os.path import join
from warnings import warn

import numpy as np

import nimfa
import metrics


try:
    import matplotlib.pylab as plb
except ImportError as exc:
    warn("Matplotlib must be installed to run Recommendations example.")


def run():
    """
    Run SNMF/R on the MovieLens data set.
    
    Factorization is run on `ua.base`, `ua.test` and `ub.base`, `ub.test` data set. This is MovieLens's data set split 
    of the data into training and test set. Both test data sets are disjoint and with exactly 10 ratings per user
    in the test set. 
    """
    for data_set in ['ua', 'ub']:
        V = read(data_set)
        W, H = factorize(V)
        rmse(W, H, data_set)


def factorize(V):
    """
    Perform SNMF/R factorization on the sparse MovieLens data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The MovieLens data matrix. 
    :type V: `numpy.matrix`
    """
    snmf = nimfa.Snmf(V, seed="random_vcol", rank=30, max_iter=30, version='r', eta=1.,
                      beta=1e-4, i_conv=10, w_min_change=0)
    print("Algorithm: %s\nInitialization: %s\nRank: %d" % (snmf, snmf.seed, snmf.rank))
    fit = snmf()
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter,
                                                            fit.distance(metric='euclidean'),
                                                            sparse_w, sparse_h))
    return fit.basis(), fit.coef()


def read(data_set):
    """
    Read movies' ratings data from MovieLens data set. 
    
    :param data_set: Name of the split data set to be read.
    :type data_set: `str`
    """
    print("Read MovieLens data set")
    fname = join(dirname('C:/Anaconda2/Lib/site-packages/nimfa/'), "datasets", "MovieLens", "%s.base" % data_set)
    V = np.ones((943, 1682)) * 2.5
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        V[u - 1, i - 1] = r
    return V


def rmse(W, H, data_set):
    """
    Compute the RMSE error rate on MovieLens data set.
    
    :param W: Basis matrix of the fitted factorization model.
    :type W: `numpy.matrix`
    :param H: Mixture matrix of the fitted factorization model.
    :type H: `numpy.matrix`
    :param data_set: Name of the split data set to be read. 
    :type data_set: `str`
    """
    fname = join(dirname('C:/Anaconda2/Lib/site-packages/nimfa/'), "datasets", "MovieLens", "%s.test" % data_set)
    rmse = []
    print 'fname', fname
    test = np.zeros((943, 1682)) 
    for line in open(fname):
        u, i, r, _ = list(map(int, line.split()))
        test[u-1][i-1] = r
        sc = max(min((W[u - 1, :] * H[:, i - 1])[0, 0], 5), 1)
        rmse.append((sc - r) ** 2)
    print("RMSE: %5.3f" % np.mean(rmse))
    VV = np.asarray(np.dot(W,H))
    for k in [1,3,5,10]:
        print 'k =', k
        counts = 0
        pres = 0
        ndcg = 0
        for ii, user in enumerate(VV):
            counts+=1
            r = np.argsort(user)[::-1]
            rr = []
            for jj in (r):
                if test[ii][jj] != 0:
                    rr.append(1 if VV[ii][jj]-0.49 < test[ii][jj] else 0)
            #print rr
            pres += metrics.precision_at_k(rr,k)
            ndcg += metrics.ndcg_at_k(rr,k)
        print k," is k and precision_at_k: ", pres*1.0/counts
        print k," is k and ndcg_at_k: ", ndcg*1.0/counts

if __name__ == "__main__":
    """Run the Recommendations example."""
    run()


Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113908.611
            - Sparseness basis: 0.144, mixture: 0.387
fname C:/Anaconda2/Lib/site-packages/nimfa\datasets\MovieLens\ua.test
RMSE: 1.683
k = 1
1  is k and precision_at_k:  0.901378579003
1  is k and ndcg_at_k:  0.901378579003
k = 3
3  is k and precision_at_k:  0.900318133616
3  is k and ndcg_at_k:  0.900525512878
k = 5
5  is k and precision_at_k:  0.88525980912
5  is k and ndcg_at_k:  0.892682403729
k = 10
10  is k and precision_at_k:  0.850053022269
10  is k and ndcg_at_k:  0.947138361287
Read MovieLens data set
Algorithm: snmf - r
Initialization: random_vcol
Rank: 30
Stats:
            - iterations: 30
            - Euclidean distance: 113556.436
            - Sparseness basis: 0.144, mixture: 0.508
fname C:/Anaconda2/Lib/site-packages/nimfa\datasets\MovieLens\ub.test
RMSE: 1.703
k = 1
1  is k and precision_at_k:  0.916224814

In [38]:
user = [ 4.74850767,  2.82699823,  3.08403436]
r= np.argsort(user)[::-1]
print r

[0 2 1]
