In [None]:
conda install -c conda-forge/label/cf202003 scikit-surprise

In [None]:
import pandas as pd
from surprise import Dataset
from surprise import Reader

In [None]:
ratings_dict = {
    "item": [1, 2, 1, 2, 1, 2, 1, 2, 1],
    "user": ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D', 'E'],
    "rating": [1, 2, 2, 4, 2.5, 4, 4.5, 5, 3],
}

df = pd.DataFrame(ratings_dict)

In [None]:
df

In [None]:
reader = Reader(rating_scale=(1, 5))

data = Dataset.load_from_df(df[["user", "item", "rating"]], reader)

In [None]:
from surprise import KNNWithMeans

In [None]:
sim_options = {
    "name": "cosine",
}
algo = KNNWithMeans(k = 3, sim_options=sim_options)

In [None]:
trainingSet = data.build_full_trainset()

algo.fit(trainingSet)

In [None]:
prediction = algo.predict('E', 2)
prediction.est

# **Movie Lens**

In [None]:
import numpy as np

import sklearn


In [None]:
ratings = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv")
ratings.head()

In [None]:
movies = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv")
movies.head()

In [None]:
movies.tail()

In [None]:
n_ratings = len(ratings)
n_ratings

In [None]:
n_movies = len(ratings['movieId'].unique())
n_movies

In [None]:
n_users = len(ratings['userId'].unique())
n_users

In [None]:

mean_rating = ratings.groupby('movieId')[['rating']].mean()

In [None]:
mean_rating.head()

In [None]:
highest_rated = mean_rating['rating'].idxmax()

In [None]:
movies.loc[movies['movieId'] == highest_rated]

In [None]:
# Lowest rated movies
lowest_rated = mean_rating['rating'].idxmin()
movies.loc[movies['movieId'] == lowest_rated]

In [None]:
ratings[ratings['movieId']==highest_rated]

In [None]:
ratings[ratings['movieId']==lowest_rated]

In [None]:
N = len(ratings['userId'].unique())
M = len(ratings['movieId'].unique())

In [None]:
from scipy.sparse import csr_matrix

In [None]:
# Map Ids to indices
user_mapper = dict(zip(np.unique(ratings["userId"]), list(range(N))))
movie_mapper = dict(zip(np.unique(ratings["movieId"]), list(range(M))))

# Map indices to IDs
user_inv_mapper = dict(zip(list(range(N)), np.unique(ratings["userId"])))
movie_inv_mapper = dict(zip(list(range(M)), np.unique(ratings["movieId"])))

user_index = [user_mapper[i] for i in ratings['userId']]
movie_index = [movie_mapper[i] for i in ratings['movieId']]
X = csr_matrix((ratings["rating"], (movie_index, user_index)), shape=(M, N))

In [None]:
X.shape

In [None]:
from sklearn.neighbors import NearestNeighbors

In [None]:
def find_similar_movies(movie_id, X, k):
      
    neighbour_ids = []
      
    movie_ind = movie_mapper[movie_id]
    movie_vec = X[movie_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric='cosine')
    kNN.fit(X)

    movie_vec = movie_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(movie_vec, return_distance=False)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(movie_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids

In [None]:
movie_titles = dict(zip(movies['movieId'], movies['title']))
  
movie_id = 3
  
similar_ids = find_similar_movies(movie_id, X, k=10)
movie_title = movie_titles[movie_id]
  
print(f"Since you watched {movie_title}")
for i in similar_ids:
    print(movie_titles[i])