In [3]:
from numpy import *
U, sigma, VT = linalg.svd([[1,1],[7,7]])

In [4]:
U

array([[-0.14142136, -0.98994949],
       [-0.98994949,  0.14142136]])

In [5]:
sigma

array([10.,  0.])

In [6]:
VT

array([[-0.70710678, -0.70710678],
       [-0.70710678,  0.70710678]])

In [9]:
from numpy import linalg as la

def euclidSimm(inA, inB):
    return 1.0/(1.0+la.norm(inA-inB))

def pearsSim(inA, inB):
    if len(inA)<3:
        return 1.0
    return 0.5 + 0.5*corrcoef(inA, inB, rowvar=0)[0][1]

def cosSim(inA, inB):
    num = float(inA.T*inB)
    denom = la.norm(inA)*la.norm(inB)
    return 0.5+0.5*(num/denom)

In [17]:
def loadExData():
    return [
        [4, 4, 0, 2, 2],
        [4, 0, 0, 3, 3],
        [4, 0, 0, 1, 1],
        [1, 1, 1, 2, 0],
        [2, 2, 2, 0, 0],
        [1, 1, 1, 0, 0],
        [5, 5, 5, 0, 0]
    ]

In [19]:
myMat = mat(loadExData())
euclidSimm(myMat[:,0], myMat[:,0])

1.0

In [14]:
def standEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1] #物品数目
    simTotal = 0.0
    ratSimTotal = 0.0
    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0:
            continue
        overLap = nonzero(logical_and(dataMat[:, item].A>0, dataMat[:, j].A>0))[0]
        if len(overLap) == 0:
            similarity = 0
        else:
            similarity = simMeas(dataMat[overLap, item], dataMat[overLap, j])
        simTotal += similarity
        ratSimTotal += similarity*userRating
    if simTotal == 0:
        return 0
    return ratSimTotal / simTotal

In [15]:
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
    unratedItems = nonzero(dataMat[user,:].A==0)[1]
    if len(unratedItems) == 0:
        return 'you rated everything'
    itemScores = []
    for item in unratedItems:
        estimatedScore = estMethod(dataMat, user, simMeas, item)
        itemScores.append((item, estimatedScore))
    return sorted(itemScores, key=lambda jj:jj[1], reverse=True)[:N]

In [20]:

myMat

matrix([[4, 4, 0, 2, 2],
        [4, 0, 0, 3, 3],
        [4, 0, 0, 1, 1],
        [1, 1, 1, 2, 0],
        [2, 2, 2, 0, 0],
        [1, 1, 1, 0, 0],
        [5, 5, 5, 0, 0]])

In [21]:
recommend(myMat, 2)

[(2, 2.5), (1, 2.0243290220056256)]

In [22]:
recommend(myMat, 2, simMeas=euclidSimm)

[(2, 3.0), (1, 2.8266504712098603)]

In [23]:
recommend(myMat, 2, simMeas=pearsSim)

[(2, 2.5), (1, 2.0)]

In [24]:
def loadExData2():
    return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
           [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
           [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
           [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
           [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
           [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
           [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
           [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
           [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
           [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
           [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]

In [25]:
U, Sigma, VT = la.svd(mat(loadExData2()))

In [27]:
Sigma

array([15.77075346, 11.40670395, 11.03044558,  4.84639758,  3.09292055,
        2.58097379,  1.00413543,  0.72817072,  0.43800353,  0.22082113,
        0.07367823])

In [31]:
Sig2 = Sigma**2
sum(Sig2)*0.9

487.7999999999994

In [32]:
sum(Sig2[:3])

500.5002891275791

In [33]:
def svdEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    U, Sigma, VT = linalg.svd(dataMat)
    Sig4 = mat(eye(4)*Sigma[:4])
    xformedItems = dataMat.T * U[:,:4] * Sig4.I
    for j in range(n):
        userRating = dataMat[user,j]
        if userRating == 0 or j== item: continue
        similarity = simMeas(xformedItems[item,:].T, xformedItems[j,:].T)
        print('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal/simTotal

In [35]:
recommend(myMat, 2, estMethod=svdEst)

the 1 and 0 similarity is: 0.498142
the 1 and 3 similarity is: 0.498131
the 1 and 4 similarity is: 0.509974
the 2 and 0 similarity is: 0.552670
the 2 and 3 similarity is: 0.552976
the 2 and 4 similarity is: 0.217301


[(2, 2.253270755977714), (1, 1.9921514636756923)]

In [37]:
def printMat(inMat, thresh = 0.8):
    for i in range(32):
        for k in range(32):
            if float(inMat[i,k]) > thresh:
                print(1)
            else: print(0)
        print('')

In [39]:
def imgCompress(numSV=3, thresh=0.8):
    myl =[]
    for line in open('0_5.txt').readlines():
        newRow = []
        for i in range(32):
            newRow.append(int(line[i]))
        myl.append(newRow)
    myMat = mat(myl)
    print("**** priginal matrix ****")
    printMat(myMat, thresh)
    U, Sigma, VT = linalg.svd(myMat)
    SigRecon = mat(zeros((numSV, numSV)))
    for k in range(numSV):
        SigRecon[k,k] = Sigma[k]
    reconMat = U[:,:numSV]*SigRecon*VT[:numSV,:]
    print("****reconstructed matrix  using %d singular values *****" % numSV)
    printMat(reconMat, thresh)

In [40]:
imgCompress(3)

**** priginal matrix ****
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0

0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0

0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0

0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0

0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0

0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0

0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
0
0
0
0
1
1
1
1
1
0
0
0
0
0
0
0
0

0
0
0
0
0
0
0
1
1
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1
0
0
0
0
0
0
0
0

0
0
0
0
0
0
1
1
1
1
1
1
0
0
0
0
0
0
0
0
1
1
1
1
0
0
0
0
0
0
0
0

0
0
0
0
0
0
1
1
1
1
1
1
0
0
0
0
0
0
0
0
1
1
1
1
1
0
0
0
0
0
0
0

0
0
0
0
0
0
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
1
1
1
1
0
0
0
0
0
0
0

0
0
0
0
0
0
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
1
1
1
1
0
0
0
0
0
0
0

0
0
0
0
0
0
0
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
1
1
1
1
0
0
0
0
0
0

0
0
0
0
0
0
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
1
1
1
1
0
0
0
0
0
0
