In [22]:
import numpy as np

Similarity Measures

In [52]:
def euclideanSim(a,b):
    return 1.0/(1.0 + np.linalg.norm(a-b))

def personSim(a,b):
    if(len(a)<3): 
        return 1.0
    else:
        return 0.5 + 0.5*np.corrcoef(a,b,rowvar=0)[0][1] #parameter:rowvar=0, each column represents a variable
    
def cosSim(a,b):
    num = float(a.T*b)
    denom = np.linalg.norm(a)*np.linalg.norm(b)
    return 0.5 + 0.5*num/denom

In [53]:
def standEst(dataMat, user, simMeas, item):
    n = np.shape(dataMat)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    for j in range(n):
        userRating = dataMat[user,j]
        if userRating == 0: 
            continue
        overLap = np.nonzero(np.logical_and(dataMat[:,item].A>0, dataMat[:,j].A>0))[0]
        if len(overLap) == 0: 
            similarity = 0
        else: 
            similarity = simMeas(dataMat[overLap,item], dataMat[overLap,j])
            #print 'the %d and %d similarity is: %f' % (item, j, similarity)
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: 
        return 0
    else: 
        return ratSimTotal/simTotal

In [70]:
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
    unratedItems = np.nonzero(dataMat[user,:].A==0)[1]
    if len(unratedItems) == 0: 
        return 'you rated everything' 
    itemScores = []
    for item in unratedItems:
        estimatedScore = estMethod(dataMat, user, simMeas, item)
        itemScores.append((item, estimatedScore))
    return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]

In [67]:
def loadExData():
    return[[1, 1, 1, 0, 0],
           [2, 2, 2, 0, 0],
           [1, 1, 1, 0, 0],
           [5, 5, 5, 0, 0],
           [1, 1, 0, 2, 2],
           [0, 0, 0, 3, 3],
           [0, 0, 0, 1, 1]]

In [68]:
myMat=np.mat(loadExData())
myMat[0,1]=myMat[0,0]=myMat[1,0]=myMat[2,0]=4
myMat[3,3]=2
myMat

matrix([[4, 4, 1, 0, 0],
        [4, 2, 2, 0, 0],
        [4, 1, 1, 0, 0],
        [5, 5, 5, 2, 0],
        [1, 1, 0, 2, 2],
        [0, 0, 0, 3, 3],
        [0, 0, 0, 1, 1]])

In [72]:
recommend(myMat, 6, simMeas=euclideanSim)

[(0, 1.0), (1, 1.0), (2, 1.0)]

Dealing with sparse matrix using SVD

In [94]:
def loadExData2():
    return[[2, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
           [0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0],
           [3, 3, 4, 0, 3, 0, 0, 2, 2, 0, 0],
           [5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
           [4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5],
           [0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4],
           [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
           [0, 0, 0, 3, 0, 0, 0, 0, 4, 5, 0],
           [1, 1, 2, 1, 1, 2, 1, 0, 4, 5, 0]]

In [93]:
U, Sigma, VT = np.linalg.svd(np.mat(loadExData2()))
Sigma

array([  1.34342819e+01,   1.18190832e+01,   8.20176076e+00,
         6.86912480e+00,   5.29063022e+00,   3.91213561e+00,
         2.94562509e+00,   2.35486137e+00,   2.08702082e+00,
         7.08715931e-01,   1.18673615e-16])

In [79]:
sig2 = Sigma**2

In [80]:
sum(sig2)

496.99999999999966

In [81]:
0.9 * sum(sig2)

447.29999999999973

In [84]:
sum(sig2[:4])

434.62441339532046

In [85]:
sum(sig2[:5]) # more than 90%

462.61518152879387

So we can reduce our matrix from an 11-dimensional matrix to a 5-dimensional matrix. Now let’s create a function to calculate similarities in our 5-dimensional space. We’re going to use the SVD to map our dishes into a lower- dimensional space.

In [103]:
def svdEst(dataMat, user, simMeas, item):
    n = np.shape(dataMat)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    U,Sigma,VT = np.linalg.svd(dataMat)
    Sig5 = np.mat(np.eye(5)*Sigma[:5])
    xformedItems = dataMat.T * U[:,:5] * Sig5.I
    for j in range(n):
        userRating = dataMat[user,j]
        if(userRating == 0 or j==item): 
            continue
        similarity = simMeas(xformedItems[item,:].T, xformedItems[j,:].T)
        print('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: 
        return 0
    else: 
        return ratSimTotal/simTotal

In [96]:
myMat = np.mat(loadExData2())

In [105]:
recommend(myMat, 2, estMethod=svdEst, simMeas=personSim)

the 0 and 7 similarity is: 0.876344
the 0 and 9 similarity is: 0.416515
the 1 and 7 similarity is: 0.975678
the 1 and 9 similarity is: 0.549691
the 2 and 7 similarity is: 0.959399
the 2 and 9 similarity is: 0.589358
the 3 and 7 similarity is: 0.461018
the 3 and 9 similarity is: 0.187038
the 4 and 7 similarity is: 0.579846
the 4 and 9 similarity is: 0.111735
the 5 and 7 similarity is: 0.458803
the 5 and 9 similarity is: 0.740204
the 6 and 7 similarity is: 0.361788
the 6 and 9 similarity is: 0.698233
the 8 and 7 similarity is: 0.688688
the 8 and 9 similarity is: 0.688433
the 10 and 7 similarity is: 0.371606
the 10 and 9 similarity is: 0.604393


[(6, 2.976091657951736), (10, 2.8577665526626057), (5, 2.8520427070008876)]