In [2]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process

In [3]:
movies='Data/movies_small.csv'
ratings='Data/ratings_small.csv'

df_movies=pd.read_csv(movies, usecols=['movieId','title'], dtype={'movieId':'int32','title':'str'})
df_ratings=pd.read_csv(ratings, usecols=['userId','movieId','rating'],dtype={'userId':'int32','movieId':'int32','rating':'float32'})

display(df_movies[:2], df_ratings[:2])

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)


Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0


In [14]:
#Spare Matrix
#         Users
#        [4,4,5] A
#Movies  [3,3,4] B ==   Cos(A,B) => 0.95 
#       [3,2,1]

movies_users=df_ratings.pivot(index='movieId', columns='userId',values='rating').fillna(0)
mat_movies_users=csr_matrix(movies_users.values)

mat_movies_users.shape

(9724, 610)

In [5]:
# Euclidean Distance
# Manhattan Distance
# Minkowski Distance 
# Cosine Similarity
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)

In [32]:
model_knn.fit(mat_movies_users)

df_ratings

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0
...,...,...,...
100831,610,166534,4.0
100832,610,168248,5.0
100833,610,168250,5.0
100834,610,168252,5.0


In [33]:
model_knn.fit(mat_movies_users)

idx=process.extractOne('iron man', df_movies['title'])[2]

print('Movie Selected: ',df_movies['title'][idx], 'Index: ',idx)
print('Searching for recommendations.....')

distances, indices=model_knn.kneighbors(mat_movies_users[idx], n_neighbors=20)
for i in indices:
    print(df_movies['title'][i].where(i!=idx))

Movie Selected:  Iron Man (2008) Index:  6743
Searching for recommendations.....


ValueError: Expected 2D array, got scalar array instead:
array=Iron Man (2008).
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [7]:
# Recommender(movie_name) => List of Movies recommended

def recommender(movie_name, data,model, n_recommendations ):
    model.fit(data)
    idx=process.extractOne(movie_name, df_movies['title'])[2]
    print('Movie Selected: ',df_movies['title'][idx], 'Index: ',idx)
    print('Searching for recommendations.....')
    distances, indices=model.kneighbors(data[idx], n_neighbors=n_recommendations)
    for i in indices:
        print(df_movies['title'][i].where(i!=idx))
    
recommender('iron man', mat_movies_users, model_knn,20)
    

Movie Selected:  Iron Man (2008) Index:  6743
Searching for recommendations.....
6743                                            NaN
7197                                  Garage (2007)
7195                        Merry Madagascar (2009)
7354                             A-Team, The (2010)
6726                         Superhero Movie (2008)
7137                         Thirst (Bakjwi) (2009)
7026                                 Scorpio (1973)
7571                                 Win Win (2011)
3880                  Look Who's Talking Now (1993)
6388    After the Wedding (Efter brylluppet) (2006)
7601                       Idiots and Angels (2008)
6755                Nina's Heavenly Delights (2006)
7022                                   Earth (2007)
7338                          Blue Valentine (2010)
4421                         What's Up, Doc? (1972)
6195                             Silent Hill (2006)
5885                                   Crash (2004)
3740                              M