In [19]:
import math
import csv
import numpy

#чтение данных из файла
def ReadFile(filename = "<csv_file_location>"):
    f = open(filename)
    r = csv.reader(f)
    mentions = dict()
    for line in r:
        user = line[0]
        product = line[1]
        rate = float(line[2])
        if not user in mentions:
            mentions[user] = dict()
        mentions[user][product] = rate
    f.close()
    return mentions

#вычисление косинусной метрики двух векторов
def distCosine(vecA, vecB):
    def dotProduct(vecA, vecB):
        d = 0.0
        for dim in vecA:
            if dim in vecB:
                d += vecA[dim]*vecB[dim]
        return d
    return dotProduct(vecA,vecB) / math.sqrt(dotProduct(vecA,vecA)) / math.sqrt(dotProduct(vecB,vecB))

#непосредственно коллаборативная фильтрация
#https://habr.com/ru/post/150399/

def makeRecommendation(userID, userRates, nBestUsers, nBestProducts):
   
    #matches = [(u, distCosine(userRates[userID], userRates[u])) for u in userRates if u != userID]
    matches = []
    for u in userRates:
        if u != userID:
            matches.append((u, distCosine(userRates[userID], userRates[u])))
    
    #bestMatches = sorted(matches, key=lambda x,y:(y,x), reverse=True)[:nBestUsers]
    bestMatches = sorted(matches, key = lambda x: (x[1]), reverse=True)[:nBestUsers]
 
    print(f'Most correlated with {userID} users:')
    for line in bestMatches:
        print(f'  UserID: {line[0]}  Coeff: {line[1]}') 
    
    sim = dict()
    sim_all = sum([x[1] for x in bestMatches])
    bestMatches = dict([x for x in bestMatches if x[1] > 0.0])        
 
    for relatedUser in bestMatches:
        for product in userRates[relatedUser]:
            if not product in userRates[userID]:
                if not product in sim:
                    sim[product] = 0.0
                sim[product] += userRates[relatedUser][product] * bestMatches[relatedUser]
                
    for product in sim:
        sim[product] /= sim_all
        
    #bestProducts = sorted(sim.items(), key=lambda x,y:(y,x), reverse=True)[:nBestProducts]
    bestProducts = sorted(sim.items(), key = lambda x: (x[1]), reverse=True)[:nBestProducts]

    print('Most correlated products:')
    for prodInfo in bestProducts:    
        #print"  ProductID: %6s  CorrelationCoeff: %6.4f" % (prodInfo[0], prodInfo[1])
        print(f'  ProductID: {prodInfo[0]}  CorrelationCoeff: {prodInfo[1]}')
    return [(x[0], x[1]) for x in bestProducts]


In [20]:
rec = makeRecommendation('ivan', ReadFile(filename = "/Volumes/data/test.csv"), 5, 5)


Most correlated with ivan users:
  UserID: alex  Coeff: 0.5163977794943223
  UserID: david  Coeff: 0.06666666666666667
  UserID: bob  Coeff: 0.0
Most correlated products:
  ProductID: 5  CorrelationCoeff: 3.5426463259379775
  ProductID: 2  CorrelationCoeff: 2.656984744453483
  ProductID: 3  CorrelationCoeff: 0.4573536740620226
  ProductID: 4  CorrelationCoeff: 0.34301525554651696
  ProductID: 7  CorrelationCoeff: 0.11433841851550565
