In [1]:
from surprise import KNNBasic,Dataset,Reader
from collections import defaultdict
from operator import itemgetter
import heapq as hq
import os
import csv

In [2]:
#Load movie.csv and ratings.csv to return a dataset
def load_dataset():
    ratings_dataset = 0
    reader = Reader(line_format = 'user item rating timestamp', sep = ',', skip_lines=1)
    ratings_dataset = Dataset.load_from_file('ml-dataset-latest/ratings.csv',reader=reader)

    #Retrieve movie's title using it's ID
    movie_titles = {}
    with open('ml-dataset-latest/movies.csv', newline = '', encoding = 'ISO-8859-1') as movie_csv:
        reader_movie = csv.reader(movie_csv)
        next(reader_movie)
        for row in reader_movie:
            movie_id = int(row[0])
            movie_name = row[1]
            movie_titles[movie_id] = movie_name

    return(ratings_dataset,movie_titles)

dataset,movie_titles = load_dataset()

training_set = dataset.build_full_trainset()

In [3]:
similarity_matrix = KNNBasic(simp_options={
    'name' : 'cosine',
    'user_based' : False
    })\
    .fit(training_set)\
    .compute_similarities()

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


In [4]:
test_subject = '500'

k = 20

In [5]:
test_subject_iid = training_set.to_inner_uid(test_subject)

test_subject_raitings = training_set.ur[test_subject_iid]
k_neighbors = hq.nlargest(k, test_subject_raitings, key=lambda t : t[1])

In [6]:
candidates = defaultdict(float)
for itemID, rating in k_neighbors:
    try:
        similarities = similarity_matrix[itemID]
        for innerID, score in enumerate(similarities):
            candidates[innerID] += score * (rating / 5.0)
    except:
        continue

In [7]:
def getMovieName(movieID):
    if int(movieID) in movie_titles:
        return movie_titles[int(movieID)]
    else:
        return ""

In [8]:
watched = {}
for itemID, rating in training_set.ur[test_subject_iid]:
    watched[itemID] = 1

recommendations = []

position = 0
for itemID, rating_sum in sorted(candidates.items(), key = itemgetter(1), reverse=True):
    if not itemID in watched:
        recommendations.append(getMovieName(training_set.to_raw_iid(itemID)))
        position += 1
        if (position > 10) : break
for rec in recommendations:
    print("Movie: ", rec)

Movie:  Natural Born Killers (1994)
Movie:  Bad Boys (1995)
Movie:  Grifters, The (1990)
Movie:  Blown Away (1994)
Movie:  Flight of the Navigator (1986)
Movie:  Dark Knight Rises, The (2012)
Movie:  Erin Brockovich (2000)
Movie:  Black Cauldron, The (1985)
Movie:  Toys (1992)
Movie:  Bedknobs and Broomsticks (1971)
Movie:  Tombstone (1993)
