## KNN model for movie recommendations

Based on https://github.com/jisilvia/kNN_Recommender_System/blob/main/kNN_Recommender_System.ipynb

In [12]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import pickle

### Import dataset

In [13]:
movies_ds = pd.read_csv('../recommendation_dataset.csv')
movies_ds = movies_ds.drop(['user_id', 'movie_id'], axis=1)
movies_ds.head()

Unnamed: 0,rating,title,action,adventure,animation,comedy,drama,fantasy,filmnoir,horror,imax,musical,mystery,other,romance,scifi,thriller,war,western
0,4.0,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0
1,4.0,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0
2,4.0,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0
3,3.5,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0
4,5.0,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0


In [14]:
movies_ds.head()

Unnamed: 0,rating,title,action,adventure,animation,comedy,drama,fantasy,filmnoir,horror,imax,musical,mystery,other,romance,scifi,thriller,war,western
0,4.0,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0
1,4.0,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0
2,4.0,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0
3,3.5,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0
4,5.0,Toy Story (1995),0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0


### Obtaining a unique list of movie titles with an average rating

In [15]:
movies_ds['average_rating'] = movies_ds.groupby('title')['rating'].transform('mean')
movies_ds = movies_ds.drop_duplicates(subset='title').drop(['rating'], axis=1)
movies_ds.head()
# save similar_movies_ds
movies_ds.to_csv('../similar_movies_ds.csv', index=False)

In [16]:
X = movies_ds.drop(['title'], axis=1)
knn = NearestNeighbors(n_neighbors=11, algorithm='ball_tree', metric='euclidean').fit(X)

In [17]:
# Movie example
toy_story = movies_ds[movies_ds['title'] == 'Toy Story (1995)'].drop(['title'], axis=1)
toy_story

Unnamed: 0,action,adventure,animation,comedy,drama,fantasy,filmnoir,horror,imax,musical,mystery,other,romance,scifi,thriller,war,western,average_rating
0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,4.046875


In [18]:
distances, indices = knn.kneighbors(toy_story)

print('Recommendations for "Toy Story (1995)":\n')
for i in range(len(distances.flatten())):
    if i != 0:
        print('{0}: {1}, with a distance of {2}.'.format(i+1, movies_ds['title'].iloc[indices.flatten()[i]],distances.flatten()[i]))

Recommendations for "Toy Story (1995)":

2: Monsters, Inc. (2001), with a distance of 0.22869318181818166.
3: Toy Story 2 (1999), with a distance of 0.3167613636363633.
4: Antz (1998), with a distance of 0.546875.
5: Emperor's New Groove, The (2000), with a distance of 0.796875.
6: Shrek the Third (2007), with a distance of 0.796875.
7: Triplets of Belleville, The (Les triplettes de Belleville) (2003), with a distance of 0.953125.
8: Just Visiting (2001), with a distance of 1.0010980299775842.
9: March of the Wooden Soldiers (a.k.a. Babes in Toyland) (1934), with a distance of 1.0010980299775842.
10: American Tail, An (1986), with a distance of 1.0010980299775842.
11: Lord of the Rings, The (1978), with a distance of 1.0010980299775842.


In [19]:
with open('knn_model.pkl', 'wb') as f:
    pickle.dump(knn, f)