## Movie Recommendation System

This project aims to develop a simple movie recommendation system, based on data taken from the Movielens website. From this data a simple film recommendation system was developed based on two algorithms (KNN and SVD) that will be shown below.

In [None]:
# Imports
import os
import pandas

from surprise import Dataset, KNNBasic, Reader, accuracy, SVD
from surprise.model_selection import cross_validate, PredefinedKFold

### Loading data

Below the data is read in order to create a training and test set datasets,
a set of user ids, and a set of item ids. The data set used is the ml-100k.

In [2]:
items_stream = open('ml-100k/u.item', 'r')
item_data = items_stream.read().split('\n')
items_stream.close()

item_data = list(map(lambda item: item.split('|')[:2], item_data))

### Loading personalized dataset that contains 4 coluns:

the user id, the item id, the rating and the timestamp of the evaluation.

In [3]:
database = pandas.read_csv('ml-100k/data.csv')
user_set = set(database.user_id)
item_set = set(database.item_id)
not_watch = {user: item_set - set(database.query('user_id == %s' %(user)).item_id) for user in user_set}

In [4]:
files_dir = os.path.expanduser('ml-100k/')
reader = Reader('ml-100k')

# Train and test sets.
train_file = files_dir + 'u1.base'
test_file = files_dir + 'u1.test'
folds_files = [(train_file, test_file)]

data = Dataset.load_from_folds(folds_files, reader=reader)
pkf = PredefinedKFold()

### Configing KNN and SVD algorithms

Configing KVN algorithm to use cosine distance to calc similarity, 5 neighbors
to take into account for aggregation with max value and set min to 2.

Using default configuration of SVD.

In [5]:
sim_options = {
    'name': 'cosine', # Using cosine distance
    'user_based': True  # compute  similarities between users
}

algorithm_knn = KNNBasic(sim_options=sim_options, k=5, min_k=2)
algorithm_svd = SVD()

for trainset, testset in pkf.split(data):
    # train and test algorithm.
    algorithm_knn.fit(trainset)
    algorithm_svd.fit(trainset)
    
    predictions_knn = algorithm_knn.test(testset)
    predictions_svd = algorithm_svd.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


### Define methods for get top 5 movies recomended to a user based and similarity with preferencies of other users.

In [6]:
def get_top_5_knn(uid: int):
    """Predict top 5 movies and return indexes of them using KNN algorithm.
    :param uid: user id.
    :returns: A list of indexes of top 5 recomended movies.
    """
    top = []
    items = not_watch[int(uid)]
    
    for item in items:
        top.append((item, algorithm_knn.predict(uid=uid, iid=str(item)).est))
    
    return sorted(top, key=lambda item: item[1], reverse=True)[:5]


def get_top_5_movies_knn(uid: int):
    """Get the name of top 5 predicted movies for the user passed using KNN algorithm.
    :param uid: user id.
    :returns: A list with names of top 5 predicted movies.
    """
    top_5 = get_top_5_knn(uid)
    return [item_data[int(item[0])][1] for item in top_5]

In [7]:
def get_top_5_svd(uid: int):
    """Predict top 5 movies and return indexes of them using SVD algorithm.
    :param uid: user id.
    :returns: A list of indexes of top 5 recomended movies.
    """
    top = []
    items = not_watch[int(uid)]
    
    for item in items:
        top.append((item, algorithm_svd.predict(uid=uid, iid=str(item)).est))
    
    return sorted(top, key=lambda item: item[1], reverse=True)[:5]


def get_top_5_movies_svd(uid):
    """Get the name of top 5 predicted movies for the user passed using SVD algorithm.
    :param uid: user id.
    :returns: A list with names of top 5 predicted movies.
    """
    top_5 = get_top_5_svd(uid)
    return [item_data[int(item[0])][1] for item in top_5]

In [8]:
def get_top_5_neighbors(uid: int):
    """Get top 5 more similar neighbors based in users preferences.
    :param uid: user id.
    :returns: A list with ids of 5 more similar neighbors of passed user.
    """
    inner_uid = algorithm_knn.trainset.to_inner_uid(uid)
    neighbords = algorithm_knn.get_neighbors(iid=inner_uid, k=5)
    
    return [algorithm_knn.trainset.to_raw_uid(iid) for iid in neighbords]