# Introdução a sistemas de recomendação

### Bibliotecas básicas

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
import matplotlib.pyplot as plt
%matplotlib inline

### Carregamento dos dados

In [2]:
filmes = pd.read_csv('./dataset/movies.csv')
filmes.columns = ['filmeId', 'titulo', 'generos']
print(filmes.shape)
filmes.head()

(9742, 3)


Unnamed: 0,filmeId,titulo,generos
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
notas = pd.read_csv('./dataset/ratings.csv')
notas.columns = ['usuarioId', 'filmeId', 'nota', 'momento']
print(notas.shape)
notas.head()

(100836, 4)


Unnamed: 0,usuarioId,filmeId,nota,momento
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


### Resumo das notas

In [4]:
notas.describe()

Unnamed: 0,usuarioId,filmeId,nota,momento
count,100836.0,100836.0,100836.0,100836.0
mean,326.127564,19435.295718,3.501557,1205946000.0
std,182.618491,35530.987199,1.042529,216261000.0
min,1.0,1.0,0.5,828124600.0
25%,177.0,1199.0,3.0,1019124000.0
50%,325.0,2991.0,3.5,1186087000.0
75%,477.0,8122.0,4.0,1435994000.0
max,610.0,193609.0,5.0,1537799000.0


### Primeira tentativa de recomendação: indicando os mais populares pelo total de votos

In [5]:
filmes.set_index('filmeId', inplace=True)

total_de_votos = notas['filmeId'].value_counts()

filmes['total_de_votos'] = total_de_votos
filmes.sort_values('total_de_votos', ascending=False).head(10)

Unnamed: 0_level_0,titulo,generos,total_de_votos
filmeId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
356,Forrest Gump (1994),Comedy|Drama|Romance|War,329.0
318,"Shawshank Redemption, The (1994)",Crime|Drama,317.0
296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,307.0
593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller,279.0
2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,278.0
260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,251.0
480,Jurassic Park (1993),Action|Adventure|Sci-Fi|Thriller,238.0
110,Braveheart (1995),Action|Drama|War,237.0
589,Terminator 2: Judgment Day (1991),Action|Sci-Fi,224.0
527,Schindler's List (1993),Drama|War,220.0


### Segunda tentativa de recomendação: indicando os mais populares pela média das notas e total de votos a partir de 50

In [6]:
notas_medias = notas.groupby('filmeId').mean()['nota']
filmes['nota_media'] = notas_medias
filmes.query('total_de_votos>=50').sort_values('nota_media', ascending=False).head(10)

Unnamed: 0_level_0,titulo,generos,total_de_votos,nota_media
filmeId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
318,"Shawshank Redemption, The (1994)",Crime|Drama,317.0,4.429022
858,"Godfather, The (1972)",Crime|Drama,192.0,4.289062
2959,Fight Club (1999),Action|Crime|Drama|Thriller,218.0,4.272936
1276,Cool Hand Luke (1967),Drama,57.0,4.27193
750,Dr. Strangelove or: How I Learned to Stop Worr...,Comedy|War,97.0,4.268041
904,Rear Window (1954),Mystery|Thriller,84.0,4.261905
1221,"Godfather: Part II, The (1974)",Crime|Drama,129.0,4.25969
48516,"Departed, The (2006)",Crime|Drama|Thriller,107.0,4.252336
1213,Goodfellas (1990),Crime|Drama,126.0,4.25
912,Casablanca (1942),Drama|Romance,100.0,4.24


### Recomendação baseada em similaridade de gênero

In [7]:
eu_assisti = [1, 21, 19, 10, 11, 7, 2]
filmes.loc[eu_assisti]

Unnamed: 0_level_0,titulo,generos,total_de_votos,nota_media
filmeId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,215.0,3.92093
21,Get Shorty (1995),Comedy|Crime|Thriller,89.0,3.494382
19,Ace Ventura: When Nature Calls (1995),Comedy,88.0,2.727273
10,GoldenEye (1995),Action|Adventure|Thriller,132.0,3.496212
11,"American President, The (1995)",Comedy|Drama|Romance,70.0,3.671429
7,Sabrina (1995),Comedy|Romance,54.0,3.185185
2,Jumanji (1995),Adventure|Children|Fantasy,110.0,3.431818


In [8]:
aventura_infantil_fantasia = filmes.query('total_de_votos>=50 & generos=="Adventure|Children|Fantasy"')
aventura_infantil_fantasia.drop(eu_assisti, errors='ignore').sort_values('nota_media', ascending=False)

Unnamed: 0_level_0,titulo,generos,total_de_votos,nota_media
filmeId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4896,Harry Potter and the Sorcerer's Stone (a.k.a. ...,Adventure|Children|Fantasy,107.0,3.761682
41566,"Chronicles of Narnia: The Lion, the Witch and ...",Adventure|Children|Fantasy,62.0,3.443548
