In [2]:
import numpy as np
import pandas as pd
import json

In [3]:
filmes = pd.read_csv('./shared/tmdb_5000_movies.csv')

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
filmes.drop_duplicates(subset=['original_title'], keep='first', inplace=True)

filmes['overview'] = filmes['overview'].fillna('')
filmes['genres'] = filmes['genres'].fillna('')
filmes['tagline'] = filmes['tagline'].fillna('')

def json_to_text(json_string):
    try:
        json_dict = json.loads(json_string)
        return ' '.join([x['name'] for x in json_dict])
    except json.JSONDecodeError:
        return ''


In [6]:
filmes['keywords'] = filmes['keywords'].apply(json_to_text)

In [7]:
filmes['genres'] = filmes['genres'].apply(json_to_text)
display(filmes.head(20))

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500
2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6 bri...,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466
3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106
4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124
5,258000000,Fantasy Action Adventure,http://www.sonypictures.com/movies/spider-man3/,559,dual identity amnesia sandstorm love of one's ...,en,Spider-Man 3,The seemingly invincible Spider-Man goes up ag...,115.699814,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-01,890871626,139.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,The battle within.,Spider-Man 3,5.9,3576
6,260000000,Animation Family,http://disney.go.com/disneypictures/tangled/,38757,hostage magic horse fairy tale musical princes...,en,Tangled,When the kingdom's most wanted-and most charmi...,48.681969,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2010-11-24,591794936,100.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,They're taking adventure to new lengths.,Tangled,7.4,3330
7,280000000,Action Adventure Science Fiction,http://marvel.com/movies/movie/193/avengers_ag...,99861,marvel comic sequel superhero based on comic b...,en,Avengers: Age of Ultron,When Tony Stark tries to jumpstart a dormant p...,134.279229,"[{""name"": ""Marvel Studios"", ""id"": 420}, {""name...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2015-04-22,1405403694,141.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,A New Age Has Come.,Avengers: Age of Ultron,7.3,6767
8,250000000,Adventure Fantasy Family,http://harrypotter.warnerbros.com/harrypottera...,767,witch magic broom school of witchcraft wizardr...,en,Harry Potter and the Half-Blood Prince,"As Harry begins his sixth year at Hogwarts, he...",98.885637,"[{""name"": ""Warner Bros."", ""id"": 6194}, {""name""...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2009-07-07,933959197,153.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Dark Secrets Revealed,Harry Potter and the Half-Blood Prince,7.4,5293
9,250000000,Action Adventure Fantasy,http://www.batmanvsupermandawnofjustice.com/,209112,dc comics vigilante superhero based on comic b...,en,Batman v Superman: Dawn of Justice,Fearing the actions of a god-like Super Hero l...,155.790452,"[{""name"": ""DC Comics"", ""id"": 429}, {""name"": ""A...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2016-03-23,873260194,151.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Justice or revenge,Batman v Superman: Dawn of Justice,5.7,7004


In [8]:
# pd.set_option('display.max_colwidth', 10000)

#display(filmes['keywords'])

filmes['infos'] = filmes['tagline'] + ' ' + filmes['overview'] + ' ' + filmes['genres'] + ' ' + filmes['keywords']

#display(filmes['infos'])

In [13]:
import difflib

def filme_mais_semelhante(nome_filme):
    semelhanca = difflib.get_close_matches(nome_filme, filmes['original_title'].tolist(), n=1, cutoff=0.6)
    if semelhanca:
        return semelhanca[0]
    else:
        return "Nenhum filme encontrado."

# Exemplo de uso
lista_filmes = ["O Poderoso Chefão", "Star Wars", "Matrix", "Senhor dos Anéis", "Jurassic Park"]
nome_digitado = input("Digite o nome do filme que você procura: ")

filme_encontrado = filme_mais_semelhante(nome_digitado)
print("Filme mais semelhante encontrado:", filme_encontrado)

Filme mais semelhante encontrado: Thor


In [18]:
vec = TfidfVectorizer(stop_words='english')
Tfidf = vec.fit_transform(filmes['infos'].apply(lambda x: np.str_(x)))

cosine_sim = cosine_similarity(Tfidf)

sim_filmes = pd.DataFrame(cosine_sim, columns=filmes['original_title'], index=filmes['original_title'])

recommendations = pd.DataFrame(sim_filmes[filme_encontrado].sort_values(ascending=False))

display(recommendations.head(20))
'''
recommendations = pd.DataFrame(sim_filmes[filme_encontrado].sort_values(ascending=False)) :)

lista_semelhantes = []
recommendations = pd.DataFrame(sim_filmes[lista_semelhante].sort_values(ascending=False))

    ([:60])'''

Unnamed: 0_level_0,Thor
original_title,Unnamed: 1_level_1
Thor,1.0
Thor: The Dark World,0.513994
The Avengers,0.215422
Captain America: Civil War,0.196928
Iron Man 2,0.19392
Avengers: Age of Ultron,0.191436
Ant-Man,0.174812
Iron Man 3,0.173517
Captain America: The First Avenger,0.172039
Iron Man,0.16553


'\nrecommendations = pd.DataFrame(sim_filmes[nome_digitado].sort_values(ascending=False)) :)\ntop_100_recommendations = recommendations.head(100)\ndisplay(top_100_recommendations.head(60))\n\n    ([:60])'