## Recomendation

In [2]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process



In [3]:
movies = "data/ml-latest-small/movies.csv"
ratings = "data/ml-latest-small/ratings.csv"

df_movies = pd.read_csv(movies, usecols=["movieId", "title"], dtype={"movieId":"int32", "title":"str"})
df_ratings = pd.read_csv(ratings, usecols=["userId", "movieId", "rating"], dtype={"userId":"int32", "movieId": "int32", "rating":"float32"})

In [4]:
df_ratings.head(10)

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0
5,1,70,3.0
6,1,101,5.0
7,1,110,4.0
8,1,151,5.0
9,1,157,5.0


In [5]:
df_movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [6]:
df_movies.describe()

Unnamed: 0,movieId
count,9742.0
mean,42200.353623
std,52160.494854
min,1.0
25%,3248.25
50%,7300.0
75%,76232.0
max,193609.0


In [7]:
# sparse Matrix

movie_users = df_ratings.pivot(index="movieId", columns="userId", values="rating").fillna(0)
mat_movies_users = csr_matrix(movie_users.values)

In [8]:
# Euclidean dist L1
# Manhattan dist L2
# Minkowski dist
# Cosine similarity

model_knn = NearestNeighbors(metric="cosine", algorithm="brute", n_neighbors=20)

In [9]:
model_knn.fit(mat_movies_users) # Här använder vi redan .fit

In [11]:
# Recomender(movie_name) => List of movies recommended 

def recommender(movie_name, data, model, n_recommendations):
    # model.fit(data) Om vi använder .fit en gång till så får vi större chans till felaktigt anpassning av modellen. Så vi använder helt enkelt .fit från cellen ovan
    idx = process.extractOne(movie_name, df_movies["title"])[2]
    print("Movie Selected: ", df_movies["title"][idx], "Index: ", idx)
    print("Searching for recommendations....")
    distances, indices=model.kneighbors(data[idx], n_neighbors=n_recommendations)
    for i in indices:
        print(df_movies["title"][i].where(i!=idx))

recommender("toy story 2", mat_movies_users, model_knn,20)

Movie Selected:  Toy Story 2 (1999) Index:  2355
Searching for recommendations....
2355                                                  NaN
4253                                   Life Stinks (1991)
3856                                   Windtalkers (2002)
2322                                 Irma la Douce (1963)
3901                          K-19: The Widowmaker (2002)
1963                                    Idle Hands (1999)
2801                                Boys and Girls (2000)
3685                                   Waydowntown (2000)
2883                               Bless the Child (2000)
3812                       Full Moon in Blue Water (1988)
3647                                Charlotte Gray (2001)
3488    Into the Arms of Strangers: Stories of the Kin...
4330                                         Manic (2001)
3697                                         Heidi (1937)
3659                             Russia House, The (1990)
3540                                       Band