### Importando Dataset

In [1]:
import pandas as pd

In [2]:
original_dataset = pd.read_csv('https://raw.githubusercontent.com/GuiMesc/movie-recommendation-system-KNN/main/imdb_top_1000.csv')
original_dataset.head()

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000


### Pré-Processamento

#### Removendo colunas que não serão utilizadas

In [3]:
#Dataset armazenado em outra variável para manter o original sem alterações
modified_dataset = original_dataset.drop(columns=["Poster_Link", "Overview", "Director", "Star1", "Star2", "Star3", "Star4", "Gross", "Released_Year", "Certificate", "Runtime", "No_of_Votes"],axis=1)
modified_dataset.head()

Unnamed: 0,Series_Title,Genre,IMDB_Rating,Meta_score
0,The Shawshank Redemption,Drama,9.3,80.0
1,The Godfather,"Crime, Drama",9.2,100.0
2,The Dark Knight,"Action, Crime, Drama",9.0,84.0
3,The Godfather: Part II,"Crime, Drama",9.0,90.0
4,12 Angry Men,"Crime, Drama",9.0,96.0


#### Renomeando as colunas restantes

In [4]:
rename = {
    "Series_Title": "Titulo_do_Filme",
    "Genre": "Genero",
    "IMDB_Rating": "Avaliacao_IMDB",
    "Meta_score": "Pontuacao_Metacritic",
}
modified_dataset = modified_dataset.rename(columns=rename)
modified_dataset.head()

Unnamed: 0,Titulo_do_Filme,Genero,Avaliacao_IMDB,Pontuacao_Metacritic
0,The Shawshank Redemption,Drama,9.3,80.0
1,The Godfather,"Crime, Drama",9.2,100.0
2,The Dark Knight,"Action, Crime, Drama",9.0,84.0
3,The Godfather: Part II,"Crime, Drama",9.0,90.0
4,12 Angry Men,"Crime, Drama",9.0,96.0


#### Transformando coluna "Pontuacao_Metacritic" e criando coluna única "Avaliacao"

In [5]:
#Colocando coluna "Pontuacao_Metacritic" numa escala de 10
modified_dataset["Pontuacao_Metacritic"] = modified_dataset["Pontuacao_Metacritic"] / 10
modified_dataset["Avaliacao"] = (modified_dataset["Pontuacao_Metacritic"] + modified_dataset["Avaliacao_IMDB"])/2
modified_dataset.head()

Unnamed: 0,Titulo_do_Filme,Genero,Avaliacao_IMDB,Pontuacao_Metacritic,Avaliacao
0,The Shawshank Redemption,Drama,9.3,8.0,8.65
1,The Godfather,"Crime, Drama",9.2,10.0,9.6
2,The Dark Knight,"Action, Crime, Drama",9.0,8.4,8.7
3,The Godfather: Part II,"Crime, Drama",9.0,9.0,9.0
4,12 Angry Men,"Crime, Drama",9.0,9.6,9.3


#### Removendo colunas e organizando a ordem de apresentação

In [6]:
#modified_dataset = modified_dataset.drop(["Avaliacao_IMDB", "Pontuacao_Metacritic"])
modified_dataset = modified_dataset.reindex(columns=["Titulo_do_Filme", "Genero", "Avaliacao"])
modified_dataset.head()

Unnamed: 0,Titulo_do_Filme,Genero,Avaliacao
0,The Shawshank Redemption,Drama,8.65
1,The Godfather,"Crime, Drama",9.6
2,The Dark Knight,"Action, Crime, Drama",8.7
3,The Godfather: Part II,"Crime, Drama",9.0
4,12 Angry Men,"Crime, Drama",9.3


#### Criando a pivot table para classificação

In [7]:
movies_pivot = modified_dataset.pivot_table(columns="Genero", index="Titulo_do_Filme", values="Avaliacao")
pd.options.display.max_rows = None
movies_pivot.head()

Genero,"Action, Adventure","Action, Adventure, Comedy","Action, Adventure, Drama","Action, Adventure, Family","Action, Adventure, Fantasy","Action, Adventure, History","Action, Adventure, Horror","Action, Adventure, Mystery","Action, Adventure, Romance","Action, Adventure, Sci-Fi",...,Horror,"Horror, Mystery, Sci-Fi","Horror, Mystery, Thriller","Horror, Sci-Fi","Horror, Thriller","Mystery, Romance, Thriller","Mystery, Sci-Fi, Thriller","Mystery, Thriller",Thriller,Western
Titulo_do_Filme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
(500) Days of Summer,,,,,,,,,,,...,,,,,,,,,,
12 Angry Men,,,,,,,,,,,...,,,,,,,,,,
12 Years a Slave,,,,,,,,,,,...,,,,,,,,,,
1917,,,,,,,,,,,...,,,,,,,,,,
2001: A Space Odyssey,,,,,,,,,,,...,,,,,,,,,,


In [8]:
movies_pivot.fillna(0, inplace=True)
movies_pivot.head()

Genero,"Action, Adventure","Action, Adventure, Comedy","Action, Adventure, Drama","Action, Adventure, Family","Action, Adventure, Fantasy","Action, Adventure, History","Action, Adventure, Horror","Action, Adventure, Mystery","Action, Adventure, Romance","Action, Adventure, Sci-Fi",...,Horror,"Horror, Mystery, Sci-Fi","Horror, Mystery, Thriller","Horror, Sci-Fi","Horror, Thriller","Mystery, Romance, Thriller","Mystery, Sci-Fi, Thriller","Mystery, Thriller",Thriller,Western
Titulo_do_Filme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
(500) Days of Summer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12 Angry Men,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12 Years a Slave,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1917,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001: A Space Odyssey,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Criando matriz sparsa a partir da tabela pivot

In [9]:
from scipy.sparse import csr_matrix

movies_sparse = csr_matrix(movies_pivot)
type(movies_sparse)

scipy.sparse._csr.csr_matrix

## Treinando o Modelo

In [10]:
from sklearn.neighbors import NearestNeighbors

model = NearestNeighbors(algorithm='brute')
model.fit(movies_sparse)

#### Recomendações de Filmes

In [11]:
recomendation_list = []
movie_name = input("Informe o nome do filme: ")
distances, sugestions = model.kneighbors(movies_pivot.filter(items=[movie_name], axis=0).values.reshape(1, -1))

for i in range(len(sugestions)):
  recomendation_list.append(movies_pivot.index[sugestions[i]].tolist())

print(f"Boas recomendações para você que assistiu '{movie_name}' são: \n{recomendation_list}\n")
print("Distâncias entre os filmes recomendados: ")
print(distances)

Informe o nome do filme: Pulp Fiction
Boas recomendações para você que assistiu 'Pulp Fiction' são: 
[['Pulp Fiction', '12 Angry Men', 'The Godfather: Part II', 'Taxi Driver', 'The Godfather']]

Distâncias entre os filmes recomendados: 
[[0.   0.15 0.15 0.3  0.45]]


#### Verificando as Informações do Filme

In [12]:
#Só mudar o titulo do filme para verificar se o genero e a nota são iguais ou semelhantes e comprovar se a recomendação está certa
modified_dataset[modified_dataset['Titulo_do_Filme'] == 'Pulp Fiction']

Unnamed: 0,Titulo_do_Filme,Genero,Avaliacao
6,Pulp Fiction,"Crime, Drama",9.15
