# Recommendation system training using k nearest neighbour

In [12]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process

movies = "../data/movie_lens_small/movies.csv"
ratings = "../data/movie_lens_small/ratings.csv"

df_movies = pd.read_csv(movies, usecols=["movieId", "title"], dtype={"movieId":"int32", "title":"str"})
df_ratings = pd.read_csv(ratings, usecols=["userId", "movieId", "rating"], dtype={"userId":"int32", "movieId":"int32", "rating":"float32"})
df_ratings.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  9742 non-null   int32 
 1   title    9742 non-null   object
dtypes: int32(1), object(1)
memory usage: 114.3+ KB


In [7]:
## Spare Matrix
        
#         Users
#        [4,4,5] A
#movies  [3,3,4] B == Cos(A,B) => 0.95
#        [3,2,1]

movies_users = df_ratings.pivot(index="movieId", columns="userId", values="rating").fillna(0)
mat_movies_users = csr_matrix(movies_users.values)


In [20]:
print(movies_users)

userId   1    2    3    4    5    6    7    8    9    10   ...  601  602  603  \
movieId                                                    ...                  
1        4.0  0.0  0.0  0.0  4.0  0.0  4.5  0.0  0.0  0.0  ...  4.0  0.0  4.0   
2        0.0  0.0  0.0  0.0  0.0  4.0  0.0  4.0  0.0  0.0  ...  0.0  4.0  0.0   
3        4.0  0.0  0.0  0.0  0.0  5.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
4        0.0  0.0  0.0  0.0  0.0  3.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
5        0.0  0.0  0.0  0.0  0.0  5.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
...      ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   
193581   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
193583   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
193585   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
193587   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
193609   0.0  0.0  0.0  0.0 

In [8]:
# Euclidean Distance
# Manhattan Distance
# Minkowski Distance
# Cosine Similarity

model_knn = NearestNeighbors(metric="cosine", algorithm="brute", n_neighbors=20)

In [9]:
model_knn.fit(mat_movies_users)

In [14]:
# Recommender(movie_name) => List of Movies recommended 

def recommender(movie_name, data, model, n_recommendations) :
    model.fit(data)
    idx = process.extractOne(movie_name, df_movies["title"])[2]
    print("Movie Selected:" ,df_movies["title"][idx], "Index: ",idx )
    print("Searching for recommendations.....")
    distances, indices=model.kneighbors(data[idx], n_neighbors=n_recommendations)
    for i in indices:
        print(df_movies["title"][i].where(i!=idx))

recommender("batman", mat_movies_users, model_knn,20)


Movie Selected: Batman Forever (1995) Index:  126
Searching for recommendations.....
126                                                  NaN
509                                        Batman (1989)
337                                     True Lies (1994)
302                    Ace Ventura: Pet Detective (1994)
378                                   Cliffhanger (1993)
508                            Dances with Wolves (1990)
138                    Die Hard: With a Vengeance (1995)
275                                      Stargate (1994)
506                                       Aladdin (1992)
307                      Clear and Present Danger (1994)
217    Interview with the Vampire: The Vampire Chroni...
9                                       GoldenEye (1995)
123                                     Apollo 13 (1995)
418                                 Jurassic Park (1993)
287                        Star Trek: Generations (1994)
398                                 Fugitive, The (1993)
197