# Recommender on small data set

In [13]:
import pandas as pd
from scipy.sparse import csr_matrix
import numpy as np
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process

In [14]:
# read small dataset for movies and ratings
df_movies = pd.read_csv("../data/Large-data/ml-latest-small/movies.csv", usecols=["movieId", "title"])
df_ratings = pd.read_csv("../data/Large-data/ml-latest-small/ratings.csv", usecols=["movieId", "userId", "rating"])

df_movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [15]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [16]:
# joins dataframes to get one dataframe with movieid userid and title
df_ratings_movies = df_ratings.join(df_movies.set_index("movieId"), on="movieId")

# creates pivot table of ratings per user and movie title, then fills all nan values with 0
ratings_table = pd.pivot_table(df_ratings_movies, index="title", columns="userId", values="rating").fillna(0)
ratings_table

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Salem's Lot (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
eXistenZ (1999),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0
xXx (2002),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,2.0
xXx: State of the Union (2005),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.5
¡Three Amigos! (1986),4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
# talked with people in class about how to solve assignment parts of the code is arrived from that.
# Recommender(movie name, number of movies to recommend) => list of movies recommended
def recommender(movie_name, data, n_recommendations):
    # finds the full title based on name given
    title = process.extractOne(movie_name, data.index)[0]

    # convert to sparse matrix
    mat_movies_users = csr_matrix(data.values)
    mat_movies_users

    # training model
    model = NearestNeighbors(metric="cosine", algorithm="brute")
    model.fit(mat_movies_users)
    
    # finds the index of the specific movie
    idx = np.where(data.index == title)[0][0]
    print(f"Recommendations based on: {title}")
    print(idx)
    print("-"*80)

    # gets the distace and indices for the number of recommendations
    distances, indices = model.kneighbors(mat_movies_users[idx], n_neighbors=n_recommendations+1)
    recommended_movies = []

    # loops through the lenght of distances array to add movies to the recommendation list
    for i in range(1, len(distances.flatten())):
        recommended_movies.append(data.index[indices.flatten()[i]])
        
    return recommended_movies


test = recommender("fellowship of the ring", ratings_table, 10)

for title in test:
    print(title)


Recommendations based on: Lord of the Rings: The Fellowship of the Ring, The (2001)
5204
--------------------------------------------------------------------------------
Lord of the Rings: The Return of the King, The (2003)
Lord of the Rings: The Two Towers, The (2002)
Pirates of the Caribbean: The Curse of the Black Pearl (2003)
Matrix, The (1999)
Shrek (2001)
Fight Club (1999)
Memento (2000)
Spider-Man (2002)
Gladiator (2000)
Batman Begins (2005)
