In [1]:
#欧氏距离可以作为衡量两个用户相似度的指标，但它也有一些缺点，因此皮尔逊相关系数常用于推荐引擎

In [7]:
import json
import numpy as np

#定义计算user1和use2的皮尔逊相关系数
def pearson_score(dataset,use1,user2):
    if user1 not in dataset:
        raise TypeError('user'+user1+'not present in the dataset')
    if user2 not in dataset:
        raise TypeError('user'+user2+'not present in the dataset')
    
    rated_by_both={}
    
    for item in dataset[user1]:
        if item in dataset[user2]:
            rated_by_both[item]=1
    if len(rated_by_both)==0:
        return 0
    
    user1_sum=np.sum([dataset[user1][item] for item in rated_by_both.keys()])
    user2_sum=np.sum([dataset[user2][item] for item in rated_by_both.keys()])
    
    user1_squared_sum=np.sum([np.square(dataset[user1][item]) for item in rated_by_both.keys()])
    user2_squared_sum=np.sum([np.square(dataset[user2][item]) for item in rated_by_both.keys()])
    
    product_sum=np.sum(dataset[user1][item]*dataset[user2][item] for item in rated_by_both.keys())
    
    num_ratings=len(rated_by_both)
    
    sxy=product_sum-user1_sum*user2_sum/num_ratings
    sxx=user1_squared_sum-np.square(user1_sum)/num_ratings
    syy=user2_squared_sum-np.square(user2_sum)/num_ratings
    if sxx*syy==0:
        return 0
    return sxy/np.sqrt(sxx*syy)

    #计算皮尔逊相关系数
    
            
            
data_file=r'F:\程序员\python\Python机器学习经典实例\Chapter05\movie_ratings.json'

with open(data_file) as f:
    data=json.loads(f.read())
print(data)

user1='John Carson'
user2='Michelle Peterson'

print('pearson_score:{:.3f}'.format(pearson_score(data,user1,user2)))
            

{'John Carson': {'Inception': 2.5, 'Pulp Fiction': 3.5, 'Anger Management': 3.0, 'Fracture': 3.5, 'Serendipity': 2.5, 'Jerry Maguire': 3.0}, 'Michelle Peterson': {'Inception': 3.0, 'Pulp Fiction': 3.5, 'Anger Management': 1.5, 'Fracture': 5.0, 'Jerry Maguire': 3.0, 'Serendipity': 3.5}, 'William Reynolds': {'Inception': 2.5, 'Pulp Fiction': 3.0, 'Fracture': 3.5, 'Jerry Maguire': 4.0}, 'Jillian Hobart': {'Pulp Fiction': 3.5, 'Anger Management': 3.0, 'Jerry Maguire': 4.5, 'Fracture': 4.0, 'Serendipity': 2.5}, 'Melissa Jones': {'Inception': 3.0, 'Pulp Fiction': 4.0, 'Anger Management': 2.0, 'Fracture': 3.0, 'Jerry Maguire': 3.0, 'Serendipity': 2.0}, 'Alex Roberts': {'Inception': 3.0, 'Pulp Fiction': 4.0, 'Jerry Maguire': 3.0, 'Fracture': 5.0, 'Serendipity': 3.5}, 'Michael Henry': {'Pulp Fiction': 4.5, 'Serendipity': 1.0, 'Fracture': 4.0}}
pearson_score:0.396
