In [26]:
import codecs
from math import sqrt
import numpy as np

In [69]:
class Recommender:
    def __init__(self,data, k = 1, metric = 'pearson', n = 5):
        self.k = k
        self.n = n
        self.metric = metric
        if self.metric == 'pearson':
            self.fn = self.pearson
        if type(data).__name__ == 'dict':
            self.data = data

    def loadMovieDB(self, path=''):
        self.data = {}
        i = 0
        #first load Movie ratings into self.data
        f = codecs.open(path + "Movie_Ratings.csv", 'r', 'utf8')
        movies = [] # 'movie_name': [ratings]
        users = []
        for line in f:
            i += 1
            fields = line.split(',')
            #print('********   ', i, '  ********')
            temp = []
            for j in fields:
                #print(j)
                if i == 1:
                    if len(j) > 0:
                        users.append(j.strip('\n').strip('"'))
                else:
                    temp.append(j.strip('\n').strip('"'))
            if len(temp) > 0:        
                movies.append(temp)
        f.close()
        
        for user in users:
            self.data[user] = {}
            for movie in movies:
                    if movie[users.index(user) +1] != '':
                        self.data[user][movie[0]] = int(movie[users.index(user) +1])

    def pearson(self, rating1, rating2):
        sum_xy = 0
        sum_x = 0
        sum_y = 0
        sum_x2 = 0
        sum_y2 = 0
        n = 0
        for key in rating1:
            if key in rating2:
                n += 1
                x = rating1[key]
                y = rating2[key]
                sum_xy += x * y
                sum_x += x
                sum_y += y
                sum_x2 += pow(x, 2)
                sum_y2 += pow(y, 2)
                        
        if n == 0:
            return 0
        #now compute denominator
        denominator = (sqrt(sum_x2 - pow(sum_x, 2) / n)
                       * sqrt(sum_y2 - pow(sum_y, 2) / n))
        if denominator == 0:
            return 0
        else:
            return (sum_xy - (sum_x * sum_y) / n) / denominator
        
    def cosineSimilarity(self, rating1, rating2):
        sum_xy = 0
        sum_x2 = 0
        sum_y2 = 0
        for key in rating1:
            if key in rating2:
                x = rating1[key]
                y = rating2[key]
                sum_xy += np.dot(x, y)
                sum_x2 += pow(x, 2)
                sum_y2 += pow(y, 2)
        denominator = (sqrt(sum_x2) * sqrt(sum_y2))
        if denominator == 0:
            return 0
        else:
            return sum_xy / denominator

        
    def computeNearestNeighbor(self, user):
        """creates a sorted list of users based on their distance"""
        distances = []
        for instance in self.data:
            if instance != user:
                distance = self.cosineSimilarity(self.data[user],
                                   self.data[instance])
                distances.append((instance, distance))
        #sort based on distance -- closest first
        distances.sort(key=lambda artistTuple: artistTuple[1],
                       reverse=True)
        return distances
    
    def recommend(self, user):
       """Give list of recommendations"""
       recommendations = {}
       #first get list of users  ordered by nearness
       nearest = self.computeNearestNeighbor(user)

       #now get the ratings for the user

       userRatings = self.data[user]

       #determine the total distance
       totalDistance = 0.0
       for i in range(self.k):
          totalDistance += nearest[i][1]
       #now iterate through the k nearest neighbors
       #accumulating their ratings
       for i in range(self.k):
          #compute slice of pie
          weight = nearest[i][1] / totalDistance
          #get the name of the person
          name = nearest[i][0]
          #get the ratings for this person
          neighborRatings = self.data[name]
          #get the name of the person
          #now find movies neighbor rated that user didn't
          for movie in neighborRatings:
             if not movie in userRatings:
                if movie not in recommendations:
                   recommendations[movie] = (neighborRatings[movie]
                                              * weight)
                else:
                   recommendations[movie] = (recommendations[movie]
                                              + neighborRatings[movie]
                                              * weight)
       #now make list from dictionary
       recommendations = list(recommendations.items())

       #finally sort and return
       recommendations.sort(key=lambda artistTuple: artistTuple[1],
                            reverse = True)
       #return the first n items
       return recommendations[:self.n]

In [70]:
r = Recommender(data ='', k= 3)

In [71]:
r.loadMovieDB(path="D:\Improve Skills\A Prorammers Gui to Data Mining\BX-Dump\\")

In [73]:
r.recommend('Heather')

[('Braveheart', 2.6696674603094266),
 ('Old School', 2.6693662830333866),
 ('Pulp Fiction', 2.6693662830333866),
 ('Alien', 2.3331854894747237),
 ('Blade Runner', 1.6692951058834922)]