In [1]:
import pandas as pd
import numpy as np


from surprise import KNNWithZScore,KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader

from surprise.model_selection import train_test_split

In [2]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
movies_with_ratings = movies.join(ratings.set_index('movieId'), on='movieId').reset_index(drop=True)
movies_with_ratings.dropna(inplace=True)

In [4]:
dataset = pd.DataFrame({
    'uid': movies_with_ratings.userId,
    'iid': movies_with_ratings.movieId,
    'ratings': movies_with_ratings.rating
})

In [5]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(dataset, reader)

In [6]:
algo = KNNWithMeans(k=50, sim_options={'name': 'cosine', 'user_based': False})

In [7]:
trainset, testset = train_test_split(data, test_size=.30)

In [8]:
algo.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x2479ea3d320>

In [9]:
test_pred = algo.test(testset)

In [10]:
accuracy.rmse(test_pred, verbose=True)

RMSE: 0.9069


0.9068847109009078

In [11]:
uid=5

def personal_predistions(uid):
    df = pd.DataFrame(columns=['userId', 'movieId', 'title', 'rating_pred'])
    uid_movies = movies_with_ratings[(movies_with_ratings.userId==uid)].movieId.values
    iid_movies = [i for i in movies_with_ratings.movieId.unique() if i not in uid_movies]
    for i in iid_movies:
        if trainset.knows_item(i):
            if len(movies[(movies.movieId==i)])!=0:
                title = ''.join(movies[(movies.movieId==i)].title.values)
                prediction = algo.predict(uid=uid, iid=i)
                df = df.append({'userId': uid, 'movieId': i, 'title':title, 'rating_pred': prediction.est}, ignore_index=True)
    return df.sort_values('rating_pred', ascending=False).head(10)

personal_predistions(uid)

Unnamed: 0,userId,movieId,title,rating_pred
4694,5,7071,"Woman Under the Influence, A (1974)",5.0
487,5,626,"Thin Line Between Love and Hate, A (1996)",5.0
3223,5,4429,Moby Dick (1956),5.0
4060,5,5890,Elling (2001),5.0
4623,5,6983,Jane Eyre (1944),5.0
4011,5,5786,Paid in Full (2002),5.0
984,5,1341,Burnt Offerings (1976),5.0
2563,5,3496,Madame Sousatzka (1988),5.0
3995,5,5746,Galaxy of Terror (Quest) (1981),5.0
5148,5,8477,"Jetée, La (1962)",5.0


In [13]:
algo.predict(uid=5, iid=626)

Prediction(uid=5, iid=626, r_ui=None, est=5.0, details={'actual_k': 1, 'was_impossible': False})