In [1]:
from numpy import *
U, Sigma, VT = linalg.svd([[1, 1], [7, 7]])

In [2]:
U

array([[-0.14142136, -0.98994949],
       [-0.98994949,  0.14142136]])

In [3]:
Sigma

array([1.00000000e+01, 1.44854506e-16])

In [4]:
VT

array([[-0.70710678, -0.70710678],
       [-0.70710678,  0.70710678]])

In [19]:
from numpy import linalg as la

def ecludSim(inA, inB):
    return 1.0 / (1.0 + la.norm(inA - inB))

def pearsSim(inA, inB):
    if len(inA) < 3:
        return 1.0
    return 0.5 + 0.5 * corrcoef(inA, inB, rowvar = 0)[0][1]

def cosSim(inA, inB):
    num = float(inA.T * inB)
    denom = la.norm(inA) * la.norm(inB)
    return 0.5 + 0.5 * (num / denom)

In [20]:
def loadExData():
    return[[0, 0, 0, 2, 2],
           [0, 0, 0, 3, 3],
           [0, 0, 0, 1, 1],
           [1, 1, 1, 0, 0],
           [2, 2, 2, 0, 0],
           [5, 5, 5, 0, 0],
           [1, 1, 1, 0, 0]]

In [21]:
myMat = mat(loadExData())

In [22]:
ecludSim(myMat[:,0], myMat[:,4])

0.12973190755680383

In [23]:
ecludSim(myMat[:,0], myMat[:,0])

1.0

In [24]:
cosSim(myMat[:,0], myMat[:,4])

0.5

In [25]:
cosSim(myMat[:,0], myMat[:,0])

1.0

In [26]:
pearsSim(myMat[:,0], myMat[:,4])

0.20596538173840329

In [27]:
myMat[:, 0]

matrix([[0],
        [0],
        [0],
        [1],
        [2],
        [5],
        [1]])

In [28]:
myMat[:, 4]

matrix([[2],
        [3],
        [1],
        [0],
        [0],
        [0],
        [0]])

In [29]:
pearsSim(myMat[:,0], myMat[:,4])

0.20596538173840329

In [31]:
pearsSim(myMat[:,:2], myMat[:,:2])

1.0

In [16]:
#计算在给定相似度计算方法的条件下，用户对物品的估计评分值
#standEst()函数中：参数dataMat表示数据矩阵，user表示用户编号，simMeas表示相似度计算方法，item表示物品编号
#针对item而不是针对人，用评分结构形似的item去contribute从而推测出该item的评价
def standEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1]    #the number of items
    simTotal, ratSimTotal = 0.0, 0.0
    #比较每一个item与该item的相似度
    for j in range(n):
        userRating = dataMat[user, j]
        #如果该user评了j，才继续进行分析
        if userRating == 0.0:
            continue
        #寻找该用户评过分的项目，然后比较这两个项目的向量
        overLap = nonzero(logical_and(dataMat[:,item].A > 0, dataMat[:,j].A > 0))[0]
        if len(overLap) == 0:
            similarity = 0
        else:
            similarity = simMeas(dataMat[overLap, item], dataMat[overLap, j])   #只考虑都评过分的部分
        print('the %d and %d similarity is : %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating    #similarity大则该userRating占的比重大
    if simTotal == 0:
        return 0
    else:
        return ratSimTotal / simTotal

In [17]:
def recommend(dataMat, user, N = 3, simMeas = cosSim, estMethod = standEst):
    unratedItems = nonzero(dataMat[user,:].A == 0)[1]
    if len(unratedItems) == 0:
        return 'you rated everything'
    itemScores = []
    for item in unratedItems:
        estimatedScore = estMethod(dataMat, user, simMeas, item)
        itemScores.append((item, estimatedScore))
    return sorted(itemScores, key = lambda jj: jj[1], reverse = True)[:N]

In [18]:
myMat = mat(loadExData())

In [19]:
myMat[0,1] = myMat[0,0] = myMat[1,0] = myMat[2,0] = 4
myMat[3,3] = 2

In [20]:
myMat

matrix([[4, 4, 0, 2, 2],
        [4, 0, 0, 3, 3],
        [4, 0, 0, 1, 1],
        [1, 1, 1, 2, 0],
        [2, 2, 2, 0, 0],
        [5, 5, 5, 0, 0],
        [1, 1, 1, 0, 0]])

In [21]:
recommend(myMat, 2)

the 1 and 0 similarity is : 1.000000
the 1 and 3 similarity is : 0.928746
the 1 and 4 similarity is : 1.000000
the 2 and 0 similarity is : 1.000000
the 2 and 3 similarity is : 1.000000
the 2 and 4 similarity is : 0.000000


[(2, 2.5), (1, 2.0243290220056256)]

In [22]:
recommend(myMat, 2, simMeas = ecludSim)

the 1 and 0 similarity is : 1.000000
the 1 and 3 similarity is : 0.309017
the 1 and 4 similarity is : 0.333333
the 2 and 0 similarity is : 1.000000
the 2 and 3 similarity is : 0.500000
the 2 and 4 similarity is : 0.000000


[(2, 3.0), (1, 2.8266504712098603)]

In [23]:
recommend(myMat, 2, simMeas = pearsSim)

the 1 and 0 similarity is : 1.000000
the 1 and 3 similarity is : 1.000000
the 1 and 4 similarity is : 1.000000
the 2 and 0 similarity is : 1.000000
the 2 and 3 similarity is : 1.000000
the 2 and 4 similarity is : 0.000000


[(2, 2.5), (1, 2.0)]

In [24]:
def loadExData2():
    return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
           [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
           [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
           [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
           [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
           [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
           [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
           [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
           [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
           [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
           [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]

In [25]:
U, Sigma, VT = la.svd(mat(loadExData2()))
Sigma

array([15.77075346, 11.40670395, 11.03044558,  4.84639758,  3.09292055,
        2.58097379,  1.00413543,  0.72817072,  0.43800353,  0.22082113,
        0.07367823])

In [26]:
Sig2 = Sigma ** 2
sum(Sig2)

541.9999999999993

In [27]:
sum(Sig2) * 0.9

487.7999999999994

In [28]:
sum(Sig2[:2])

378.82955951135784

In [29]:
sum(Sig2[:3])

500.50028912757904

In [44]:
def svdEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1]
    simTotal, ratSimTotal = 0.0, 0.0
    U, Sigma, VT = la.svd(dataMat)
    Sig4 = mat(eye(4) * Sigma[:4])
    xformedItems = dataMat.T * U[:,:4] * Sig4.I
    print('U',U[:,:4])
    print('Sig4.I', Sig4.I)
    print(xformedItems)
    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0 or j == item:
            continue
        similarity = simMeas(xformedItems[item,:].T, xformedItems[j,:].T)
        print('the %d and %d similarity is : %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal / simTotal

In [45]:
myMat = mat(loadExData2())

In [46]:
recommend(myMat, 1, estMethod = svdEst)

U [[-0.02173672 -0.41043862 -0.29555566 -0.31993924]
 [-0.01664767 -0.40868796 -0.29149768 -0.15138979]
 [-0.03763173 -0.27302481  0.4269746  -0.10978297]
 [-0.3928286   0.03215633  0.00283676  0.02145489]
 [-0.68146521  0.05125169  0.00962441  0.02067521]
 [-0.01031581 -0.35826614  0.52908411  0.06256753]
 [-0.60364271 -0.00222591 -0.02262313 -0.02723249]
 [-0.02078959 -0.4841342  -0.34503998 -0.16062914]
 [-0.01290907 -0.35922701 -0.12620599  0.9008227 ]
 [-0.00900549 -0.30733798  0.47941858 -0.14085095]
 [-0.11812788  0.00805012  0.00186006 -0.00117811]]
Sig4.I [[0.06340851 0.         0.         0.        ]
 [0.         0.08766774 0.         0.        ]
 [0.         0.         0.09065817 0.        ]
 [0.         0.         0.         0.20633883]]
[[-0.45137416  0.03084799 -0.00290108  0.01189185]
 [-0.36239706  0.02584428 -0.00189127  0.01348796]
 [-0.46879252  0.03296133 -0.00281253  0.01656192]
 [-0.01007685 -0.34024331 -0.22728592  0.14546051]
 [-0.01567036 -0.38750193  0.6119799

[(4, 3.344714938469228), (7, 3.329402072452697), (9, 3.328100876390069)]

In [40]:
recommend(myMat, 1, estMethod = svdEst, simMeas = pearsSim)

[[-0.45137416  0.03084799 -0.00290108  0.01189185]
 [-0.36239706  0.02584428 -0.00189127  0.01348796]
 [-0.46879252  0.03296133 -0.00281253  0.01656192]
 [-0.01007685 -0.34024331 -0.22728592  0.14546051]
 [-0.01567036 -0.38750193  0.61197998 -0.17137451]
 [-0.01664563 -0.52000097 -0.3608907  -0.14984063]
 [-0.00474684 -0.18887149 -0.00924222  0.94228361]
 [-0.46712774  0.00389831  0.03349951 -0.02080674]
 [-0.47223188  0.02853952 -0.00504059  0.00160266]
 [-0.01591788 -0.39205093  0.55707516  0.04356321]
 [-0.0552444  -0.52034959 -0.36330956 -0.19023805]]
the 0 and 3 similarity is : 0.341942
the 0 and 5 similarity is : 0.124132
the 0 and 10 similarity is : 0.116698
[[-0.45137416  0.03084799 -0.00290108  0.01189185]
 [-0.36239706  0.02584428 -0.00189127  0.01348796]
 [-0.46879252  0.03296133 -0.00281253  0.01656192]
 [-0.01007685 -0.34024331 -0.22728592  0.14546051]
 [-0.01567036 -0.38750193  0.61197998 -0.17137451]
 [-0.01664563 -0.52000097 -0.3608907  -0.14984063]
 [-0.00474684 -0.188

[(4, 3.346952186702173), (9, 3.33537965732747), (6, 3.3071930278130366)]

In [61]:
#image compression
def printMat(inMat, thresh = 0.8):
    for i in range(32):
        for k in range(32):
            if float(inMat[i, k]) > thresh:
                print(1,end = '')
            else:
                print(0,end = '')
        print('')
        
        
def imgCompress(numSV = 3, thresh = 0.8):
    myl = []
    for line in open('0_5.txt').readlines():
        newRow = []
        for i in range(32):
            newRow.append(int(line[i]))
        myl.append(newRow)
    myMat = mat(myl)
    print('****original matrix****')
    printMat(myMat, thresh)
    U, Sigma, VT = la.svd(myMat)
    SigRecon = mat(zeros((numSV, numSV)))
    for k in range(numSV):
        SigRecon[k,k] = Sigma[k]
    reconMat = U[:,:numSV] * SigRecon * VT[:numSV,:]
    print('****reconstructed matrix using %d singular values****' % numSV)
    printMat(reconMat, thresh)

In [62]:
imgCompress(2)

****original matrix****
00000000000000110000000000000000
00000000000011111100000000000000
00000000000111111110000000000000
00000000001111111111000000000000
00000000111111111111100000000000
00000001111111111111110000000000
00000000111111111111111000000000
00000000111111100001111100000000
00000001111111000001111100000000
00000011111100000000111100000000
00000011111100000000111110000000
00000011111100000000011110000000
00000011111100000000011110000000
00000001111110000000001111000000
00000011111110000000001111000000
00000011111100000000001111000000
00000001111100000000001111000000
00000011111100000000001111000000
00000001111100000000001111000000
00000001111100000000011111000000
00000000111110000000001111100000
00000000111110000000001111100000
00000000111110000000001111100000
00000000111110000000011111000000
00000000111110000000111111000000
00000000111111000001111110000000
00000000011111111111111110000000
00000000001111111111111110000000
00000000001111111111111110000000
0000000000011111111