### Importando as bibliotecas

In [9]:
import django
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "MovieRecomendation.settings")
django.setup()

In [10]:
from recommendations.utils import get_all_movies, search_movies, search_movie_by_id, get_genre_mapping

In [11]:
movies = get_all_movies()['results']

In [12]:
print(movies)

[{'adult': False, 'backdrop_path': '/1XDDXPXGiI8id7MrUxK36ke7gkX.jpg', 'genre_ids': [28, 12, 16, 35, 10751], 'id': 1011985, 'original_language': 'en', 'original_title': 'Kung Fu Panda 4', 'overview': 'Po is gearing up to become the spiritual leader of his Valley of Peace, but also needs someone to take his place as Dragon Warrior. As such, he will train a new kung fu practitioner for the spot and will encounter a villain called the Chameleon who conjures villains from the past.', 'popularity': 4795.12, 'poster_path': '/kDp1vUBnMpe8ak4rjgl3cLELqjU.jpg', 'release_date': '2024-03-02', 'title': 'Kung Fu Panda 4', 'video': False, 'vote_average': 6.975, 'vote_count': 335}, {'adult': False, 'backdrop_path': '/zAepSrO99owYwQqi0QG2AS0dHXw.jpg', 'genre_ids': [28, 14], 'id': 634492, 'original_language': 'en', 'original_title': 'Madame Web', 'overview': 'Forced to confront revelations about her past, paramedic Cassandra Webb forges a relationship with three young women destined for powerful future

In [13]:
query = 'Inception'
search_results = search_movies(query)

In [14]:
print(search_results)

{'page': 1, 'results': [{'adult': False, 'backdrop_path': '/8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg', 'genre_ids': [28, 878, 12], 'id': 27205, 'original_language': 'en', 'original_title': 'Inception', 'overview': 'Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his targets is offered a chance to regain his old life as payment for a task considered to be impossible: "inception", the implantation of another person\'s idea into a target\'s subconscious.', 'popularity': 113.85, 'poster_path': '/edv5CZvWj09upOsy2Y6IwDhK8bt.jpg', 'release_date': '2010-07-15', 'title': 'Inception', 'video': False, 'vote_average': 8.367, 'vote_count': 35445}, {'adult': False, 'backdrop_path': '/JeGkRdNsOuMrgwBdtB0hp763MU.jpg', 'genre_ids': [18, 53], 'id': 613092, 'original_language': 'es', 'original_title': 'El crack cero', 'overview': 'Madrid, Spain, 1975; shortly after the end of the Franco dictatorship. Six months after the mysterious death of his lover, a prestigious tailo

In [15]:
movie_id = 27205
search_result_id = search_movie_by_id(movie_id)

In [16]:
print(search_result_id)

{'adult': False, 'backdrop_path': '/8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg', 'belongs_to_collection': None, 'budget': 160000000, 'genres': [{'id': 28, 'name': 'Action'}, {'id': 878, 'name': 'Science Fiction'}, {'id': 12, 'name': 'Adventure'}], 'homepage': 'https://www.warnerbros.com/movies/inception', 'id': 27205, 'imdb_id': 'tt1375666', 'original_language': 'en', 'original_title': 'Inception', 'overview': 'Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his targets is offered a chance to regain his old life as payment for a task considered to be impossible: "inception", the implantation of another person\'s idea into a target\'s subconscious.', 'popularity': 113.85, 'poster_path': '/edv5CZvWj09upOsy2Y6IwDhK8bt.jpg', 'production_companies': [{'id': 923, 'logo_path': '/8M99Dkt23MjQMTTWukq4m5XsEuo.png', 'name': 'Legendary Pictures', 'origin_country': 'US'}, {'id': 9996, 'logo_path': '/3tvBqYsBhxWeHlu62SIJ1el93O7.png', 'name': 'Syncopy', 'origin_country'

In [17]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [18]:
genres = [movie['genre_ids'] for movie in search_results['results']]
votes = [[movie['vote_average']] for movie in search_results['results']]

mlb = MultiLabelBinarizer()
genres_encoded = mlb.fit_transform(genres)

scaler = StandardScaler()
votes_scaled = scaler.fit_transform(votes)

features = np.hstack((genres_encoded, votes_scaled))

In [19]:
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=5, n_jobs=-1)
model_knn.fit(features)
distances, indices = model_knn.kneighbors([features[0]], n_neighbors=5)
similar_movies_titles = [search_results['results'][i]['title'] for i in indices.flatten()]
print(similar_movies_titles)

['Inception', 'Inception: The Cobol Job', 'Bikini Inception', 'The Crack: Inception', 'Inception: Music from the Motion Picture']


In [20]:
genre_mapping = get_genre_mapping()
print(genre_mapping)
unique_genre_names = sorted(set(genre_mapping[genre_id] for movie in movies for genre_id in movie['genre_ids'] if genre_id in genre_mapping))

{28: 'Action', 12: 'Adventure', 16: 'Animation', 35: 'Comedy', 80: 'Crime', 99: 'Documentary', 18: 'Drama', 10751: 'Family', 14: 'Fantasy', 36: 'History', 27: 'Horror', 10402: 'Music', 9648: 'Mystery', 10749: 'Romance', 878: 'Science Fiction', 10770: 'TV Movie', 53: 'Thriller', 10752: 'War', 37: 'Western'}


In [21]:
def genres_vector(movie):
    vector = np.zeros(len(unique_genre_names))
    genre_ids = []
    if 'genres' in movie:
        genre_ids = [genre['id'] for genre in movie['genres']]
    elif 'genre_ids' in movie:
        genre_ids = movie['genre_ids']
    
    for genre_id in genre_ids:
        if genre_id in genre_mapping:
            genre_name = genre_mapping[genre_id]
            genre_index = unique_genre_names.index(genre_name)
            vector[genre_index] = 1
    return vector

In [22]:
tfidf = TfidfVectorizer(stop_words='english')
overviews = tfidf.fit_transform([movie['overview'] for movie in movies])

In [23]:
def prepare_movie_data(movies, vectorizer=None):
    if vectorizer is None:
        vectorizer = TfidfVectorizer(stop_words='english')
        overviews = [movie.get('overview', '') for movie in movies]  
        tfidf_matrix = vectorizer.fit_transform(overviews)
    else:
        tfidf_matrix = vectorizer.transform([movies.get('overview', '')])
    return tfidf_matrix, vectorizer

In [24]:
movies = get_all_movies()['results']  

all_movies_data, vectorizer = prepare_movie_data(movies)

movie_id = 27205
target_movie = search_movie_by_id(movie_id)

In [25]:
target_movie_data, _ = prepare_movie_data(target_movie, vectorizer)

similarities = cosine_similarity(target_movie_data, all_movies_data)

most_similar_indices = np.argsort(similarities[0])[::-1][1:11]

In [26]:
for index in most_similar_indices:
    print(movies[index]['title'])

Argylle
Poor Things
Wonka
Dune: Part Two
Air Force One Down
Road House
Madame Web
Sri Asih
No Way Up
Bullet Train Down


In [27]:
genre_mapping = get_genre_mapping()
movies = get_all_movies()['results']

In [28]:
unique_genre_names = sorted(set(genre_mapping[genre_id] for movie in movies for genre_id in movie['genre_ids'] if genre_id in genre_mapping))

def genres_vector(movie):
    vector = np.zeros(len(unique_genre_names))
    genre_ids = []
    if 'genres' in movie:
        genre_ids = [genre['id'] for genre in movie['genres']]
    elif 'genre_ids' in movie:
        genre_ids = movie['genre_ids']
    
    for genre_id in genre_ids:
        if genre_id in genre_mapping:
            genre_name = genre_mapping[genre_id]
            genre_index = unique_genre_names.index(genre_name)
            vector[genre_index] = 1
    return vector

In [29]:
def weighted_genres_vector(movie):
    vector = np.zeros(len(unique_genre_names))
    genre_ids = []
    
    if 'genres' in movie:
        genre_ids = [genre['id'] for genre in movie['genres']]
    elif 'genre_ids' in movie:
        genre_ids = movie['genre_ids']
    
    for index, genre_id in enumerate(genre_ids):
        if genre_id in genre_mapping:
            genre_name = genre_mapping[genre_id]
            genre_index = unique_genre_names.index(genre_name)
            weight = 2 if index == 0 else 1
            vector[genre_index] = weight
    return vector


In [30]:
all_genres_vectors = np.array([genres_vector(movie) for movie in movies])

In [31]:
movie_id = 27205
target_movie = search_movie_by_id(movie_id)
target_movie_vector = genres_vector(target_movie)

In [32]:
similarities = np.dot(all_genres_vectors, target_movie_vector) / (np.linalg.norm(all_genres_vectors, axis=1) * np.linalg.norm(target_movie_vector))

most_similar_indices_genres = np.argsort(similarities)[::-1][1:11]
print(most_similar_indices_genres)

[12  3 10  2 14 15  5  9  0  7]


In [33]:
for index in most_similar_indices_genres:
    print(movies[index]['title'])

Dune: Part Two
Sri Asih
Argylle
Damsel
Code 8 Part II
Vikings: Battle of Heirs
Bullet Train Down
Migration
Kung Fu Panda 4
Road House


In [34]:
all_genres_vectors = np.array([genres_vector(movie) for movie in movies])

In [35]:
movie_id = 27205
target_movie = search_movie_by_id(movie_id)
target_movie_vector = weighted_genres_vector(target_movie)

In [36]:
similarities = np.dot(all_genres_vectors, target_movie_vector) / (np.linalg.norm(all_genres_vectors, axis=1) * np.linalg.norm(target_movie_vector))

most_similar_indices_genres = np.argsort(similarities)[::-1][1:11]
print(most_similar_indices_genres)

[15  5  3 14  2 10  6  1 16 12]


In [37]:
for index in most_similar_indices_genres:
    print(movies[index]['title'])

Vikings: Battle of Heirs
Bullet Train Down
Sri Asih
Code 8 Part II
Damsel
Argylle
Air Force One Down
Madame Web
The Pig, the Snake and the Pigeon
Dune: Part Two


In [38]:
def find_first_genre_index(genre_vector):
    first_genre_indices = np.where(genre_vector == 1)[0]
    if len(first_genre_indices) > 0:
        return first_genre_indices[0]
    return None

target_first_genre_index = find_first_genre_index(target_movie_vector)


def has_same_first_genre(target_genre_index, movie_vector):
    return movie_vector[target_genre_index] == 1

exact_matches_indices = [i for i, vector in enumerate(all_genres_vectors) if has_same_first_genre(target_first_genre_index, vector)]

for index in exact_matches_indices:
    print(movies[index]['title'])

Kung Fu Panda 4
Damsel
Sri Asih
Godzilla x Kong: The New Empire
Migration
Argylle
Dune: Part Two


In [39]:
def find_second_genre_index(genre_vector):
    genre_indices = np.where(genre_vector == 1)[0]
    if len(genre_indices) > 1:
        return genre_indices[1]  
    return None

target_second_genre_index = find_second_genre_index(target_movie_vector)
def has_same_genre(target_genre_index, movie_vector):
    if target_genre_index is None:
        return False 
    return movie_vector[target_genre_index] == 1

exact_matches_indices_limited = exact_matches_indices[:10]

for index in exact_matches_indices_limited:
    print(movies[index]['title'])


Kung Fu Panda 4
Damsel
Sri Asih
Godzilla x Kong: The New Empire
Migration
Argylle
Dune: Part Two


In [40]:
def get_genre_id_by_name(genre_name, genre_mapping):
    name_to_id_mapping = {name: id for id, name in genre_mapping.items()}
    return name_to_id_mapping.get(genre_name)

def find_target_second_genre_id(target_movie_vector, unique_genre_names, genre_mapping):
    second_genre_index = find_second_genre_index(target_movie_vector)
    if second_genre_index is not None:
        second_genre_name = unique_genre_names[second_genre_index]
        return get_genre_id_by_name(second_genre_name, genre_mapping)
    return None

target_second_genre_id = find_target_second_genre_id(target_movie_vector, unique_genre_names, genre_mapping)


In [41]:
def contains_genre(genre_id, movie):
    return genre_id in movie.get('genre_ids', [])

filtered_movies_indices = [i for i, movie in enumerate(movies) if contains_genre(target_second_genre_id, movie)]

for index in filtered_movies_indices:
    print(movies[index]['title'])


Sri Asih
Godzilla x Kong: The New Empire
Dune: Part Two
Megamind vs. the Doom Syndicate
Code 8 Part II
Poor Things
