# Instalando Dependencias
- Pandas
- Numpy
- Scikit Learn

In [1]:
!pip install pandas numpy scikit-learn



In [2]:
import numpy as np
import pandas as pd
import requests
import zipfile
from sklearn.model_selection import train_test_split # Avaliar Precisão do Modelo
from sklearn.metrics.pairwise import cosine_similarity # Funções Avaliar Similaridade

# Baixando CSV MovieLens

In [10]:
# Link do Dataset - MovieLens Small
link = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"

# Baixando ZIP - Em Chunks
response = requests.get(link, stream=True)

# Escrevendo ZIP File no Disco
# WB = Write/Binary
with open("ml-latest-small.zip", "wb") as file:
  for chunk in response.iter_content(chunk_size=1024):
    if(chunk):
      file.write(chunk)

# Extrair CSV Desejado
with zipfile.ZipFile("ml-latest-small.zip", "r") as zip_ref:
  zip_ref.extractall()

# Carregando respectivos CSVs em DataFrames
avaliacoes = pd.read_csv(
    "ml-latest-small/ratings.csv",
    usecols=["userId", "movieId", "rating"]
)

filmes = pd.read_csv(
    "ml-latest-small/movies.csv",
    usecols=["movieId", "title"]
)

print("DataFrames carregados:")
print(filmes, avaliacoes)

DataFrames carregados:
      movieId                                      title
0           1                           Toy Story (1995)
1           2                             Jumanji (1995)
2           3                    Grumpier Old Men (1995)
3           4                   Waiting to Exhale (1995)
4           5         Father of the Bride Part II (1995)
...       ...                                        ...
9737   193581  Black Butler: Book of the Atlantic (2017)
9738   193583               No Game No Life: Zero (2017)
9739   193585                               Flint (2017)
9740   193587        Bungo Stray Dogs: Dead Apple (2018)
9741   193609        Andrew Dice Clay: Dice Rules (1991)

[9742 rows x 2 columns]         userId  movieId  rating
0            1        1     4.0
1            1        3     4.0
2            1        6     4.0
3            1       47     5.0
4            1       50     5.0
...        ...      ...     ...
100831     610   166534     4.0
100832     6

# Processando Dados

In [16]:
# Juntando Datasets
data = pd.merge(avaliacoes, filmes, on="movieId")

# Criando tabelas de Usuario/Filme (Matrix)
usuario_filme_matrix = data.pivot_table(
    index="userId",
    columns="title",
    values="rating"
)
usuario_filme_matrix.fillna(0, inplace=True)

In [22]:
print(data)

        userId  movieId  rating                           title
0            1        1     4.0                Toy Story (1995)
1            1        3     4.0         Grumpier Old Men (1995)
2            1        6     4.0                     Heat (1995)
3            1       47     5.0     Seven (a.k.a. Se7en) (1995)
4            1       50     5.0      Usual Suspects, The (1995)
...        ...      ...     ...                             ...
100831     610   166534     4.0                    Split (2017)
100832     610   168248     5.0   John Wick: Chapter Two (2017)
100833     610   168250     5.0                  Get Out (2017)
100834     610   168252     5.0                    Logan (2017)
100835     610   170875     3.0  The Fate of the Furious (2017)

[100836 rows x 4 columns]


In [23]:
data

Unnamed: 0,userId,movieId,rating,title
0,1,1,4.0,Toy Story (1995)
1,1,3,4.0,Grumpier Old Men (1995)
2,1,6,4.0,Heat (1995)
3,1,47,5.0,Seven (a.k.a. Se7en) (1995)
4,1,50,5.0,"Usual Suspects, The (1995)"
...,...,...,...,...
100831,610,166534,4.0,Split (2017)
100832,610,168248,5.0,John Wick: Chapter Two (2017)
100833,610,168250,5.0,Get Out (2017)
100834,610,168252,5.0,Logan (2017)


In [21]:
print(usuario_filme_matrix)

title   '71 (2014)  ...  À nous la liberté (Freedom for Us) (1931)
userId              ...                                           
1              0.0  ...                                        0.0
2              0.0  ...                                        0.0
3              0.0  ...                                        0.0
4              0.0  ...                                        0.0
5              0.0  ...                                        0.0
...            ...  ...                                        ...
606            0.0  ...                                        0.0
607            0.0  ...                                        0.0
608            0.0  ...                                        0.0
609            0.0  ...                                        0.0
610            4.0  ...                                        0.0

[610 rows x 9719 columns]


In [24]:
usuario_filme_matrix

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...All the Marbles (1981),...And Justice for All (1979),00 Schneider - Jagd auf Nihil Baxter (1994),1-900 (06) (1994),10 (1979),10 Cent Pistol (2015),10 Cloverfield Lane (2016),10 Items or Less (2006),10 Things I Hate About You (1999),10 Years (2011),"10,000 BC (2008)",100 Girls (2000),100 Streets (2016),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),101 Dalmatians II: Patch's London Adventure (2003),101 Reykjavik (101 Reykjavík) (2000),102 Dalmatians (2000),10th & Wolf (2006),"10th Kingdom, The (2000)","10th Victim, The (La decima vittima) (1965)","11'09""01 - September 11 (2002)",11:14 (2003),"11th Hour, The (2007)",12 Angry Men (1957),12 Angry Men (1997),12 Chairs (1971),12 Chairs (1976),12 Rounds (2009),12 Years a Slave (2013),...,Zathura (2005),Zatoichi and the Chest of Gold (Zatôichi senryô-kubi) (Zatôichi 6) (1964),Zazie dans le métro (1960),Zebraman (2004),"Zed & Two Noughts, A (1985)",Zeitgeist: Addendum (2008),Zeitgeist: Moving Forward (2011),Zeitgeist: The Movie (2007),Zelary (2003),Zelig (1983),Zero Dark Thirty (2012),Zero Effect (1998),"Zero Theorem, The (2013)",Zero de conduite (Zero for Conduct) (Zéro de conduite: Jeunes diables au collège) (1933),Zeus and Roxanne (1997),Zipper (2015),Zodiac (2007),Zombeavers (2014),Zombie (a.k.a. Zombie 2: The Dead Are Among Us) (Zombi 2) (1979),Zombie Strippers! (2008),Zombieland (2009),Zone 39 (1997),"Zone, The (La Zona) (2007)",Zookeeper (2011),Zoolander (2001),Zoolander 2 (2016),Zoom (2006),Zoom (2015),Zootopia (2016),Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,3.5,0.0,0.0,0.0
609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


 # Calculando Similiaridade de Cosseno entre Usuários
#### **Fonte:** https://www.ibm.com/br-pt/think/topics/cosine-similarity

Comumente utilizada para representar a similaridade entre dois vetores, como exemplo (Usuário Bob e Tom), e resulta em valores de 1 a -1.

- **Uma pontuação de similaridade de cosseno de 1** indica que os vetores estão apontando exatamente na mesma direção.
- **Uma pontuação de similaridade de cosseno de 0** indica que os vetores são ortogonais, o que significa que eles não têm similaridade direcional.
- **Uma pontuação de similaridade de cosseno de -1** indica que os vetores apontam em direções exatamente opostas.

<br>

##### **Fórmula:**

$$
\text{similaridade\_cosseno}(A, B) = \frac{A \cdot B}{||A|| \times ||B||}
$$

$$
\text{similaridade\_cosseno}(A, B) = \frac{\sum_{i=1}^{n} A_i B_i}{\sqrt{\sum_{i=1}^{n} A_i^2} \times \sqrt{\sum_{i=1}^{n} B_i^2}}
$$

- **A · B** é o produto escalar dos vetores A e B
- **||A||** é a magnitude (comprimento) do vetor A
- **||B||** é a magnitude do vetor B

In [30]:
similaridade_usuarios = cosine_similarity(usuario_filme_matrix)
print(similaridade_usuarios)

[[1.         0.02728287 0.05972026 ... 0.29109737 0.09357193 0.14532081]
 [0.02728287 1.         0.         ... 0.04621095 0.0275654  0.10242675]
 [0.05972026 0.         1.         ... 0.02112846 0.         0.03211875]
 ...
 [0.29109737 0.04621095 0.02112846 ... 1.         0.12199271 0.32205486]
 [0.09357193 0.0275654  0.         ... 0.12199271 1.         0.05322546]
 [0.14532081 0.10242675 0.03211875 ... 0.32205486 0.05322546 1.        ]]


# Achando Usuario com Gostos Parecidos

In [35]:
def achar_usuarios_similares(usuarioId, similaridadeUsuario, top_n = 5):
  usuario_index = usuario_filme_matrix.index.get_loc(usuarioId)
  usuario_similaridades = similaridadeUsuario[usuario_index]
  # [::-1] Inverte a Ordem
  indices_usuarios_similares =  np.argsort(usuario_similaridades)[::-1][1:top_n+1]
  return usuario_filme_matrix.index[indices_usuarios_similares]

similar_usuarios = achar_usuarios_similares(4, similaridade_usuarios)
print(f"Usuario similares para o Quarto Usuario: {similar_usuarios}")

Usuario similares para o Quarto Usuario: Index([391, 603, 156, 275, 597], dtype='int64', name='userId')


# Recomendando filmes

In [39]:
def gerar_recomendacoes(usuarioId, similaridadeUsuario, usuario_filme_matrix, top_n = 5):
  similar_usuarios = achar_usuarios_similares(usuarioId, similaridadeUsuario)
  similar_usuarios_avaliacoes = usuario_filme_matrix.loc[similar_usuarios]
  media_avaliacoes = similar_usuarios_avaliacoes.mean()
  filmes_recomendados = media_avaliacoes.sort_values(ascending=False).head(top_n)
  return filmes_recomendados

recomendacoes = gerar_recomendacoes(4, similaridade_usuarios, usuario_filme_matrix)
print(recomendacoes)

title
Godfather, The (1972)    5.0
Rear Window (1954)       5.0
Groundhog Day (1993)     4.8
Graduate, The (1967)     4.8
Apocalypse Now (1979)    4.8
dtype: float64


# Avaliando o Modelo

In [45]:
def avaliando_modelo(usuarioId, similaridadeUsuario, usuario_filme_matrix, avaliacoes_atuais, top_n = 5):
  recomendacoes = gerar_recomendacoes(usuarioId, similaridadeUsuario, usuario_filme_matrix, top_n)
  filmes_em_comum = recomendacoes.index.intersection(avaliacoes_atuais.index)
  precisao = len(filmes_em_comum) / top_n
  # Recall mede a proporção de filmes relevantes que o Modelo recomenda
  recall = len(filmes_em_comum) / len(avaliacoes_atuais[avaliacoes_atuais > 0])
  # Uma media da média da precisao e do recall
  f1_score = 2 * (precisao * recall) / (precisao + recall)
  return precisao, recall, f1_score

avaliacoes_atuais = usuario_filme_matrix.loc[4]
precisao, recall, f1_score = avaliando_modelo(4, similaridade_usuarios, usuario_filme_matrix, avaliacoes_atuais)

print(f"Precisão Usuario 4: {precisao}")
print(f"Recall Usuario 4: {recall}")
print(f"F1 Score Usuario 4: {f1_score}")

Precisão Usuario 4: 1.0
Recall Usuario 4: 0.023148148148148147
F1 Score Usuario 4: 0.04524886877828054
