# Recommender on small data set

In [73]:
import pandas as pd
from scipy.sparse import csr_matrix
import numpy as np
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process

In [74]:
# read small dataset for movies and ratings
df_movies = pd.read_csv("../data/Large-data/ml-latest-small/movies.csv", usecols=["movieId", "title"])
df_ratings = pd.read_csv("../data/Large-data/ml-latest-small/ratings.csv", usecols=["movieId", "userId", "rating"])

df_movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [75]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [76]:
# joins dataframes to get one dataframe with movieid userid and title
df_ratings_movies = df_ratings.join(df_movies.set_index("movieId"), on="movieId")

# creates pivot table of ratings per user and movie title, then fills all nan values with 0
ratings_table = pd.pivot_table(df_ratings_movies, index="title", columns="userId", values="rating").fillna(0)
ratings_table

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Salem's Lot (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
eXistenZ (1999),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0
xXx (2002),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,2.0
xXx: State of the Union (2005),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.5
¡Three Amigos! (1986),4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [77]:
# convert to sparse matrix
mat_movies_users = csr_matrix(ratings_table.values)
mat_movies_users

<9719x610 sparse matrix of type '<class 'numpy.float64'>'
	with 100832 stored elements in Compressed Sparse Row format>

In [78]:
# training model
model_knn = NearestNeighbors(metric="cosine", algorithm="brute")
model_knn.fit(mat_movies_users)

In [79]:
# Recommender(movie name, number of movies to recommend) => list of movies recommended
def recommender(movie_name, n_recommendations):
    # finds the full title based on name given
    title = process.extractOne(movie_name, ratings_table.index)[0]
    # finds the index of the specific movie
    idx = np.where(ratings_table.index == title)[0][0]
    print(f"Recommendations based on: {title}")
    print(idx)
    print("-"*80)

    distances, indices = model_knn.kneighbors(mat_movies_users[idx], n_neighbors=n_recommendations+1)
    recommended_movies = []

    for i in range(1, len(distances.flatten())):
        recommended_movies.append(ratings_table.index[indices.flatten()[i]])
        
    return recommended_movies


test = recommender("star wars: episode V", 10)

for title in test:
    print(title)


Recommendations based on: Star Wars: Episode V - The Empire Strikes Back (1980)
8002
--------------------------------------------------------------------------------
Star Wars: Episode IV - A New Hope (1977)
Star Wars: Episode VI - Return of the Jedi (1983)
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)
Matrix, The (1999)
Indiana Jones and the Last Crusade (1989)
Terminator, The (1984)
Back to the Future (1985)
Aliens (1986)
Alien (1979)
Star Wars: Episode I - The Phantom Menace (1999)
