## Filtrado por Contenido

In [1]:
import numpy as np
import pandas as pd
from collections import OrderedDict

In [2]:
df_movies = pd.read_csv("ml-latest-small/movies.csv",sep=",")

In [3]:
df_movies.head(12)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


In [4]:
df_movies = pd.concat([df_movies, df_movies.genres.str.get_dummies(sep='|')], axis=1)

In [5]:
df_movies.head()

Unnamed: 0,movieId,title,genres,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,0,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),Adventure|Children|Fantasy,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),Comedy|Romance,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
4,5,Father of the Bride Part II (1995),Comedy,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
categorias = df_movies.columns[3:]
df_movies.loc[0]

movieId                                                         1
title                                            Toy Story (1995)
genres                Adventure|Animation|Children|Comedy|Fantasy
(no genres listed)                                              0
Action                                                          0
Adventure                                                       1
Animation                                                       1
Children                                                        1
Comedy                                                          1
Crime                                                           0
Documentary                                                     0
Drama                                                           0
Fantasy                                                         1
Film-Noir                                                       0
Horror                                                          0
IMAX      

In [7]:
user_preferences = OrderedDict(zip(categorias, []))

user_preferences['Action'] = 1
user_preferences['Adventure'] = 1
user_preferences['Animation'] = 1
user_preferences["Children's"] = 1
user_preferences["Comedy"] = 1
user_preferences['Crime'] = 5
user_preferences['Documentary'] = 1
user_preferences['Drama'] = 1
user_preferences['Fantasy'] = 1
user_preferences['Film-Noir'] = 1
user_preferences['Horror'] = 5
user_preferences['Musical'] = 1
user_preferences['Mystery'] = 5
user_preferences['Romance'] = 1
user_preferences['Sci-Fi'] = 1
user_preferences['War'] = 1
user_preferences['Thriller'] = 5
user_preferences['Western'] = 1

In [8]:
def dot_product(vector_1, vector_2):
    return sum([ i*j for i,j in zip(vector_1, vector_2)])

def get_movie_score(movie_features, user_preferences):
    return dot_product(movie_features, user_preferences)

In [9]:
toy_story_features = df_movies.loc[0][categorias]
toy_story_features

(no genres listed)    0
Action                0
Adventure             1
Animation             1
Children              1
Comedy                1
Crime                 0
Documentary           0
Drama                 0
Fantasy               1
Film-Noir             0
Horror                0
IMAX                  0
Musical               0
Mystery               0
Romance               0
Sci-Fi                0
Thriller              0
War                   0
Western               0
Name: 0, dtype: object

In [10]:
toy_story_user_predicted_score = dot_product(toy_story_features, user_preferences.values())
toy_story_user_predicted_score

9

In [11]:
def get_movie_recommendations(user_preferences, n_recommendations):
    #metemos una columna al dataset movies_df con la puntuacion calculada para el usuario
    df_movies['score'] = df_movies[categorias].apply(get_movie_score, 
                                                           args=([user_preferences.values()]), axis=1)
    return df_movies.sort_values(by=['score'], ascending=False)['title'][:n_recommendations]

get_movie_recommendations(user_preferences, 5)

7550                     Mars Needs Moms (2011)
5819                              Robots (2005)
7455                            Megamind (2010)
7441                              Rubber (2010)
7820    Journey 2: The Mysterious Island (2012)
Name: title, dtype: object

In [12]:
def get_movie_recommendations_scores(user_preferences, n_recommendations):
    #metemos una columna al dataset movies_df con la puntuacion calculada para el usuario
    df_movies['score'] = df_movies[categorias].apply(get_movie_score, 
                                                           args=([user_preferences.values()]), axis=1)
    titles = df_movies.sort_values(by=['score'], ascending=False)
    return titles['title'][:n_recommendations],titles['score'][:n_recommendations]


In [13]:
movies, scores = get_movie_recommendations_scores(user_preferences, 32)

In [14]:
scores = list(map(lambda x: 1 if x>=15 else 0, scores))

In [15]:
#METRICAS

In [16]:
# Mean Reciprocal Rank(MRR)
'''
De un listado de recomendación ordenado, MRR considera a la posición del primer elemento relevante que fue recomendado.
Sólo considera el primer elemento e ignora el resto de los aciertos.
'''

'\nDe un listado de recomendación ordenado, MRR considera a la posición del primer elemento relevante que fue recomendado.\nSólo considera el primer elemento e ignora el resto de los aciertos.\n'

In [17]:
def mean_reciprocal_rank(rs):    
    rs = (np.asarray(r).nonzero()[0] for r in rs)
    return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])

In [18]:
mean_reciprocal_rank(scores)

0.34375

In [19]:
# Precision at K
'''
Toma la efectividad acumulada hasta un k definido
'''

'\nToma la efectividad acumulada hasta un k definido\n'

In [20]:
def precision_at_k(r, k):    
    assert k >= 1
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(r)

In [21]:
precision_at_k(scores,32)

0.34375

In [22]:
# Mean Average Precision
'''
Average Precision (AP) es el promedio de los valores de precisión por sobre todo los rangos donde se encuentran items relevantes.
(M)ap, obtiene el rendimeinto de recomendacion para todos los usuarios y se saca el promedio para cada uno.
'''

'\nAverage Precision (AP) es el promedio de los valores de precisión por sobre todo los rangos donde se encuentran items relevantes.\n(M)ap, obtiene el rendimeinto de recomendacion para todos los usuarios y se saca el promedio para cada uno.\n'

In [23]:
def average_precision(r):    
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
    if not out:
        return 0.
    return np.mean(out)

In [24]:
def mean_average_precision(rs):    
    return np.mean([average_precision(r) for r in rs])

In [25]:
mean_average_precision([scores])

1.0