In [38]:
import pandas as pd
from scipy.sparse import csr_matrix #create sparce matrix of the reshaped dataframe
from sklearn.neighbors import NearestNeighbors 
from fuzzywuzzy import process # to improve search

In [39]:
movies='movies.csv'
ratings='ratings.csv'

df_movies=pd.read_csv(movies,  dtype={'movieId':'int32','title':'str'})
df_ratings=pd.read_csv(ratings, usecols=['userId','movieId','rating'],dtype={'userId':'int32','movieId':'int32','rating':'float32'})

In [40]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [41]:
df_movies.shape

(9742, 3)

In [42]:
df_movies.head()


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [43]:
#Spare Matrix
#building the KNN model    
movies_users=df_ratings.pivot(index='movieId', columns='userId',values='rating').fillna(0) #reshaping datafarme
movies_users

userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,4.0,0.0,...,0.0,4.0,3.5,0.0,0.0,0.0,0.0,0.0,4.0,5.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161944,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
162376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
162542,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
162672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
mat_movies_users=csr_matrix(movies_users.values)
mat_movies_users

<9066x671 sparse matrix of type '<class 'numpy.float32'>'
	with 100004 stored elements in Compressed Sparse Row format>

In [45]:
model_knn= NearestNeighbors(metric='euclidean', algorithm='brute', n_neighbors=20)
# euclidean =>look for euclidean distance
# brute => itertate every cell
#

In [46]:
model_knn.fit(mat_movies_users)

NearestNeighbors(algorithm='brute', metric='euclidean', n_neighbors=20)

In [47]:
def movieRecommender(movie_user_like, data,model, n_recommendations ):
    model.fit(data)
    index=process.extractOne(movie_user_like, df_movies['title'])[2] #index
    print('Searching for recommending similar movies.....')
    print("Top 5 similar movies to "+movie_user_like+" are:\n")
    print(index)
    distances, indices=model.kneighbors(data[index], n_neighbors=n_recommendations)
    for i in indices:
        print(df_movies['title'][i].where(i!=index))
    


In [48]:
movieRecommender('Vampire in Brooklyn', mat_movies_users, model_knn,10)

Searching for recommending similar movies.....
Top 5 similar movies to Vampire in Brooklyn are:

82
82                                                   NaN
1557                               Rocketeer, The (1991)
1223                            Thousand Acres, A (1997)
1533    Fanny and Alexander (Fanny och Alexander) (1982)
2423                         Zed & Two Noughts, A (1985)
1546                            Mighty Ducks, The (1992)
1535                               Tender Mercies (1983)
3561                              Better Than Sex (2000)
1161            Shall We Dance? (Shall We Dansu?) (1996)
2297                                   Dead Again (1991)
Name: title, dtype: object
