In [1]:
##查找最相似的人

In [2]:
from recommendations import critics

In [3]:
# Returns the Pearson correlation coefficient for p1 and p2
from math import sqrt
def sim_pearson(prefs,p1,p2):
    # Get the list of mutually rated items
    si={}
    for item in prefs[p1]: 
        if item in prefs[p2]: si[item]=1

    # if they are no ratings in common, return 0
    if len(si)==0: return 0

    # Sum calculations
    n=len(si)
  
    # Sums of all the preferences
    sum1=sum([prefs[p1][it] for it in si])
    sum2=sum([prefs[p2][it] for it in si])
  
    # Sums of the squares
    sum1Sq=sum([pow(prefs[p1][it],2) for it in si])
    sum2Sq=sum([pow(prefs[p2][it],2) for it in si])	
  
    # Sum of the products
    pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
  
    # Calculate r (Pearson score)
    num=pSum-(sum1*sum2/n)
    den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
    if den==0: return 0

    r=num/den

    return r

In [4]:
sim_pearson(critics,"Lisa Rose","Gene Seymour")

0.39605901719066977

In [5]:
def topMatches(prefs,person,n=5,similarity = sim_pearson):
    scores = [(similarity(prefs,person,other),other) for other in prefs if other != person]
    
    scores.sort()
    scores.reverse()
    return scores[0:n]

#两个人之间的相似度

In [6]:
topMatches(critics,"Toby",n = 3)

[(0.9912407071619299, 'Lisa Rose'),
 (0.9244734516419049, 'Mick LaSalle'),
 (0.8934051474415647, 'Claudia Puig')]

In [7]:
##推荐物品

In [8]:
def getRecommendations(prefs,person,similarity = sim_pearson):
    totals = {}
    simSums = {}
    for other in prefs:
        #不要和自己做比较
        if other == person:
            continue
        sim = similarity(prefs,person,other)
        #print(sim) #每个人的相似度
        if sim<=0:
            continue
        for item in prefs[other]:
            #只对自己没有看过的影片进行评价
            if item not in prefs[person] or prefs[person][item] == 0:
                totals.setdefault(item,0.00)
                totals[item] += prefs[other][item] * sim
                simSums.setdefault(item,0.00)
                simSums[item] += sim
    
    print(simSums)
    print("-------------------------------------------------------------")
    print(totals)
    
    rankings = [(total/simSums[item],item) for item,total in totals.items()]
    rankings.sort()
    rankings.reverse()
    return rankings
                
getRecommendations(critics,"Toby")

{'Just My Luck': 3.1903657320769114, 'The Night Listener': 3.853214712436781, 'Lady in the Water': 2.9598095649952167}
-------------------------------------------------------------
{'Just My Luck': 8.074754105841562, 'The Night Listener': 12.89975185847269, 'Lady in the Water': 8.383808341404684}


[(3.3477895267131013, 'The Night Listener'),
 (2.8325499182641614, 'Lady in the Water'),
 (2.5309807037655645, 'Just My Luck')]

In [15]:
def transformPrefs(prefs):
    result = {}
    for person in prefs:
        for item in prefs[person]:
            result.setdefault(item,{})
            
            result[item][person] = prefs[person][item]
    return result

In [17]:
movies = transformPrefs(critics)

In [20]:
movies

{'Just My Luck': {'Claudia Puig': 3.0,
  'Gene Seymour': 1.5,
  'Lisa Rose': 3.0,
  'Mick LaSalle': 2.0},
 'Lady in the Water': {'Gene Seymour': 3.0,
  'Jack Matthews': 3.0,
  'Lisa Rose': 2.5,
  'Michael Phillips': 2.5,
  'Mick LaSalle': 3.0},
 'Snakes on a Plane': {'Claudia Puig': 3.5,
  'Gene Seymour': 3.5,
  'Jack Matthews': 4.0,
  'Lisa Rose': 3.5,
  'Michael Phillips': 3.0,
  'Mick LaSalle': 4.0,
  'Toby': 4.5},
 'Superman Returns': {'Claudia Puig': 4.0,
  'Gene Seymour': 5.0,
  'Jack Matthews': 5.0,
  'Lisa Rose': 3.5,
  'Michael Phillips': 3.5,
  'Mick LaSalle': 3.0,
  'Toby': 4.0},
 'The Night Listener': {'Claudia Puig': 4.5,
  'Gene Seymour': 3.0,
  'Jack Matthews': 3.0,
  'Lisa Rose': 3.0,
  'Michael Phillips': 4.0,
  'Mick LaSalle': 3.0},
 'You, Me and Dupree': {'Claudia Puig': 2.5,
  'Gene Seymour': 3.5,
  'Jack Matthews': 3.5,
  'Lisa Rose': 2.5,
  'Mick LaSalle': 2.0,
  'Toby': 1.0}}

In [18]:
topMatches(movies,'Superman Returns')

[(0.6579516949597695, 'You, Me and Dupree'),
 (0.4879500364742689, 'Lady in the Water'),
 (0.11180339887498941, 'Snakes on a Plane'),
 (-0.1798471947990544, 'The Night Listener'),
 (-0.42289003161103106, 'Just My Luck')]

In [19]:
def topMatches(prefs,person,n=5,similarity = sim_pearson):
    scores = [(similarity(prefs,person,other),other) for other in prefs if other != person]
    
    scores.sort()
    scores.reverse()
    return scores[0:n]

#两个objects之间的相似度

In [30]:
def calculateSimilarItems(prefs,n = 10):
    result = {}
    itemPrefs = transformPrefs(prefs)
    c = 0
    for item in itemPrefs:
        #更新状态变量
        c += 1
        if c % 100:
            print("%d / %d" % (c,len(itemPrefs)))
        scores = topMatches(itemPrefs,item,n=n,similarity = sim_pearson)
        result[item] = scores
    return result

In [31]:
calculateSimilarItems(critics)

1 / 6
2 / 6
3 / 6
4 / 6
5 / 6
6 / 6


{'Just My Luck': [(0.5555555555555556, 'The Night Listener'),
  (-0.3333333333333333, 'Snakes on a Plane'),
  (-0.42289003161103106, 'Superman Returns'),
  (-0.4856618642571827, 'You, Me and Dupree'),
  (-0.9449111825230676, 'Lady in the Water')],
 'Lady in the Water': [(0.7637626158259785, 'Snakes on a Plane'),
  (0.4879500364742689, 'Superman Returns'),
  (0.3333333333333333, 'You, Me and Dupree'),
  (-0.6123724356957927, 'The Night Listener'),
  (-0.9449111825230676, 'Just My Luck')],
 'Snakes on a Plane': [(0.7637626158259785, 'Lady in the Water'),
  (0.11180339887498941, 'Superman Returns'),
  (-0.3333333333333333, 'Just My Luck'),
  (-0.5663521139548527, 'The Night Listener'),
  (-0.6454972243679047, 'You, Me and Dupree')],
 'Superman Returns': [(0.6579516949597695, 'You, Me and Dupree'),
  (0.4879500364742689, 'Lady in the Water'),
  (0.11180339887498941, 'Snakes on a Plane'),
  (-0.1798471947990544, 'The Night Listener'),
  (-0.42289003161103106, 'Just My Luck')],
 'The Night L