## Warm up recommender system

In [44]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process



In [36]:
movies = '/Users/saragunnars/Skola/GitHub/Machine-learning-Sara-Gunnars/Data/ml-latest-small/movies.csv'
ratings = '/Users/saragunnars/Skola/GitHub/Machine-learning-Sara-Gunnars/Data/ml-latest-small/ratings.csv'

df_movies = pd.read_csv(movies, usecols = ['movieId', 'title'], dtype = {'movieId':'int32', 'title':'str'})
df_ratings = pd.read_csv(ratings, usecols = ['userId','movieId', 'rating'], dtype = {'userId':  'int32','movieId':'int32', 'rating':'float32'})


In [37]:
df_movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [38]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


### Sparse Matrix

In [39]:
#created pivot table from our ratings dataframe
#fillna(0) fill NaN values with 0
movies_users = df_ratings.pivot(index = 'movieId', columns = 'userId', values = 'rating').fillna(0)
movies_users

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [41]:
#create  matrix
mat_movies_users = csr_matrix(movies_users.values)

### Creating KNN model
Ways of calculation distance between points:
- Euclidean distance
- Manhattan  distance
- Minkowski distance
- Cosine similarity (measures similarity between two vectors)

In [42]:
# metric to use for distance computation, algorithm used to compute nearest neighbor and numbers of neighbors to use.
model_knn = NearestNeighbors(metric = 'cosine', algorithm= 'brute', n_neighbors= 20)

In [43]:
# training dataset
model_knn.fit(mat_movies_users)

### Recommender (movie_name) --> list of movies recommended to us

In [60]:
#Model inputs name of movie and then compares it's rating to the rest of the datasets ratings.
#It then presents the 20 movies and their respective index nr with the most similar ratings.
def recommender(movie_name, data, model, n_recommendations):
    model.fit(data)
    idx = process.extractOne(movie_name, df_movies['title'])[2] #the index info we extract from our comparison
    print('Movie Selected: ',df_movies['title'][idx], 'Index: ', idx)
    print('Searching for recommendations.....')
    distances, indices = model.kneighbors(data[idx], n_neighbors = n_recommendations)
    # sort results in order of similarity
    for i in indices:
        print(df_movies['title'][i].where(i!=idx)) #print movie title where i is not == idx

recommender('heat', mat_movies_users, model_knn, 20)


Movie Selected:  Heat (1995) Index:  5
Searching for recommendations.....
5                                                     NaN
592                                      Rock, The (1996)
31              Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
254     Léon: The Professional (a.k.a. The Professiona...
15                                          Casino (1995)
520                                          Fargo (1996)
398                                  Fugitive, The (1993)
43                            Seven (a.k.a. Se7en) (1995)
84                                    Broken Arrow (1996)
136                                      Desperado (1995)
615                  Independence Day (a.k.a. ID4) (1996)
97                                      Braveheart (1995)
46                             Usual Suspects, The (1995)
546                            Mission: Impossible (1996)
99             Rumble in the Bronx (Hont faan kui) (1995)
507                     Terminator 2: Judgment Day (1991