## Filtrado por Contenido

In [1]:
import numpy as np
import pandas as pd
from collections import OrderedDict
import math 
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df_movies = pd.read_csv("ml-latest-small/movies.csv",sep=",")

In [3]:
df_movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
df_movies = pd.concat([df_movies, df_movies.genres.str.get_dummies(sep='|')], axis=1)

In [5]:
df_movies.head()

Unnamed: 0,movieId,title,genres,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,0,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),Adventure|Children|Fantasy,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),Comedy|Romance,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
4,5,Father of the Bride Part II (1995),Comedy,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
categorias = df_movies.columns[3:]
df_movies.loc[1]

movieId                                        2
title                             Jumanji (1995)
genres                Adventure|Children|Fantasy
(no genres listed)                             0
Action                                         0
Adventure                                      1
Animation                                      0
Children                                       1
Comedy                                         0
Crime                                          0
Documentary                                    0
Drama                                          0
Fantasy                                        1
Film-Noir                                      0
Horror                                         0
IMAX                                           0
Musical                                        0
Mystery                                        0
Romance                                        0
Sci-Fi                                         0
Thriller            

In [7]:
user_preferences = OrderedDict(zip(categorias, []))

user_preferences['Action'] = 1
user_preferences['Adventure'] = 1
user_preferences['Animation'] = 1
user_preferences["Children's"] = 1
user_preferences["Comedy"] = 1
user_preferences['Crime'] = 1
user_preferences['Documentary'] = 1
user_preferences['Drama'] = 1
user_preferences['Fantasy'] = 1
user_preferences['Film-Noir'] = 1
user_preferences['Horror'] = 5
user_preferences['Musical'] = 1
user_preferences['Mystery'] = 1
user_preferences['Romance'] = 1
user_preferences['Sci-Fi'] = 1
user_preferences['War'] = 1
user_preferences['Thriller'] = 1
user_preferences['Western'] = 1

In [8]:
def dot_product(vector_1, vector_2):
    return sum([ i*j for i,j in zip(vector_1, vector_2)])

def cosine_measure(v1, v2): 
    return cosine_similarity(np.asmatrix(v1),np.asmatrix(v2))

def get_movie_score(movie_features, user_preferences):
    return cosine_measure(movie_features, user_preferences)[0][0]

In [9]:
jumanji_features = df_movies.loc[1][categorias]
jumanji_features

(no genres listed)    0
Action                0
Adventure             1
Animation             0
Children              1
Comedy                0
Crime                 0
Documentary           0
Drama                 0
Fantasy               1
Film-Noir             0
Horror                0
IMAX                  0
Musical               0
Mystery               0
Romance               0
Sci-Fi                0
Thriller              0
War                   0
Western               0
Name: 1, dtype: object

In [10]:
"""
jumanji_user_predicted_score = cosine_similarity(
    np.asmatrix(jumanji_features.values),
    np.asmatrix(jumanji_features.values))
jumanji_user_predicted_score
"""
jumanji_features.values

array([0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=object)

In [21]:
def get_movie_recommendations(user_preferences, n_recommendations):
    #metemos una columna al dataset movies_df con la puntuacion calculada para el usuario
    for i in range(len(df_movies.index)):
        df_movies.loc[i,'score'] = get_movie_score(df_movies.loc[i][categorias].values,jumanji_features.values)
    return df_movies.sort_values(by=['score'], ascending=False)['title'][:n_recommendations]

get_movie_recommendations(user_preferences, 20)

6389                          Bridge to Terabithia (2007)
1514            Darby O'Gill and the Little People (1959)
6751     Chronicles of Narnia: Prince Caspian, The (2008)
8230                Percy Jackson: Sea of Monsters (2013)
9565                            Gulliver's Travels (1996)
6655          Water Horse: Legend of the Deep, The (2007)
767                       Escape to Witch Mountain (1975)
9336                                 Pete's Dragon (2016)
6629                           Golden Compass, The (2007)
3574    Harry Potter and the Sorcerer's Stone (a.k.a. ...
6075    Chronicles of Narnia: The Lion, the Witch and ...
53                     Indian in the Cupboard, The (1995)
7426                           Alice in Wonderland (1933)
8719                   The Cave of the Golden Rose (1991)
7478    Chronicles of Narnia: The Voyage of the Dawn T...
1556                                  Return to Oz (1985)
9294               Alice Through the Looking Glass (2016)
109           