In [1]:
import pandas as pd
import numpy as np
import pickle
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors
from multiprocessing import Pool, cpu_count

In [12]:
rated_movies = pd.read_csv('filmes.csv')
movies = pd.read_csv('TMDB_all_movies.csv')
info_movies = pd.read_csv('TMDB_movie_dataset_v11.csv')

In [13]:
movies = pd.merge(movies, info_movies, on = 'id', how = 'inner')

In [14]:
movies.head()

Unnamed: 0,id,title_x,vote_average_x,vote_count_x,status_x,release_date_x,revenue_x,runtime_x,budget_x,imdb_id_x,...,original_title_y,overview_y,popularity_y,poster_path_y,tagline_y,genres_y,production_companies_y,production_countries_y,spoken_languages_y,keywords
0,2,Ariel,7.1,346.0,Released,1988-10-21,0.0,73.0,0.0,tt0094675,...,Ariel,After the coal mine he works at closes and his...,8.155,/ojDg0PGvs6R9xYFodRct2kdI6wC.jpg,,"Drama, Comedy, Romance",Villealfa Filmproductions,Finland,Finnish,"prison, underdog, helsinki, finland, factory w..."
1,3,Shadows in Paradise,7.293,409.0,Released,1986-10-17,0.0,74.0,0.0,tt0092149,...,Varjoja paratiisissa,"Nikander, a rubbish collector and would-be ent...",5.946,/nj01hspawPof0mJmlgfjuLyJuRN.jpg,,"Drama, Comedy, Romance",Villealfa Filmproductions,Finland,"English, Finnish, Swedish","helsinki, finland, salesclerk, garbage"
2,5,Four Rooms,5.862,2694.0,Released,1995-12-09,4257354.0,98.0,4000000.0,tt0113101,...,Four Rooms,It's Ted the Bellhop's first night on the job....,15.295,/75aHn1NOYXh4M7L5shoeQ6NGykP.jpg,Twelve outrageous guests. Four scandalous requ...,Comedy,"Miramax, A Band Apart",United States of America,English,"hotel, new year's eve, witch, bet, sperm, hote..."
3,6,Judgment Night,6.5,351.0,Released,1993-10-15,12136938.0,109.0,21000000.0,tt0107286,...,Judgment Night,"While racing to a boxing match, Frank, Mike, J...",13.564,/3rvvpS9YPM5HB2f4HYiNiJVtdam.jpg,Don't move. Don't whisper. Don't even breathe.,"Action, Crime, Thriller","Largo Entertainment, JVC",United States of America,English,"drug dealer, chicago, illinois, escape, one ni..."
4,8,Life in Loops (A Megacities RMX),7.5,27.0,Released,2006-01-01,0.0,80.0,42000.0,tt0825671,...,Life in Loops (A Megacities RMX),Timo Novotny labels his new project an experim...,1.587,/x7Sz339F2oC8mBf0DHCQpKizXaL.jpg,A Megacities remix.,Documentary,inLoops,Austria,"English, Hindi, Japanese, Russian, Spanish",megacities


In [15]:
movies = movies[['id','title_x','director','runtime_x','release_date_x' ,'adult','genres_y','keywords','overview_y','poster_path_y' ]]

In [16]:
movies.head()

Unnamed: 0,id,title_x,director,runtime_x,release_date_x,adult,genres_y,keywords,overview_y,poster_path_y
0,2,Ariel,Aki Kaurismäki,73.0,1988-10-21,False,"Drama, Comedy, Romance","prison, underdog, helsinki, finland, factory w...",After the coal mine he works at closes and his...,/ojDg0PGvs6R9xYFodRct2kdI6wC.jpg
1,3,Shadows in Paradise,Aki Kaurismäki,74.0,1986-10-17,False,"Drama, Comedy, Romance","helsinki, finland, salesclerk, garbage","Nikander, a rubbish collector and would-be ent...",/nj01hspawPof0mJmlgfjuLyJuRN.jpg
2,5,Four Rooms,"Quentin Tarantino, Robert Rodriguez, Allison A...",98.0,1995-12-09,False,Comedy,"hotel, new year's eve, witch, bet, sperm, hote...",It's Ted the Bellhop's first night on the job....,/75aHn1NOYXh4M7L5shoeQ6NGykP.jpg
3,6,Judgment Night,Stephen Hopkins,109.0,1993-10-15,False,"Action, Crime, Thriller","drug dealer, chicago, illinois, escape, one ni...","While racing to a boxing match, Frank, Mike, J...",/3rvvpS9YPM5HB2f4HYiNiJVtdam.jpg
4,8,Life in Loops (A Megacities RMX),Timo Novotny,80.0,2006-01-01,False,Documentary,megacities,Timo Novotny labels his new project an experim...,/x7Sz339F2oC8mBf0DHCQpKizXaL.jpg


In [17]:
movies.rename(columns = {'title_x': 'title', 'runtime_x': 'runtime', 'release_date_x': 'year' , 'overview_y': 'overview', 'genres_y': 'genres', 'poster_path_y' : 'poster_path' }, inplace = True)

In [18]:
movies['year'] = pd.to_datetime(movies['year'], errors='coerce').dt.year


In [19]:
def format_year(year):
    try:
        if pd.isnull(year):
            return ""
        return str(int(float(year)))
    except Exception:
        return str(year)
    
movies['year'] = movies['year'].apply(format_year)    

In [20]:
movies = movies.drop_duplicates(subset=['title', 'year']).reset_index(drop=True)

In [21]:
movies['title_year'] = movies['title'] + ' (' + movies['year'].astype(str) + ')'

In [22]:
movies.head()

Unnamed: 0,id,title,director,runtime,year,adult,genres,keywords,overview,poster_path,title_year
0,2,Ariel,Aki Kaurismäki,73.0,1988,False,"Drama, Comedy, Romance","prison, underdog, helsinki, finland, factory w...",After the coal mine he works at closes and his...,/ojDg0PGvs6R9xYFodRct2kdI6wC.jpg,Ariel (1988)
1,3,Shadows in Paradise,Aki Kaurismäki,74.0,1986,False,"Drama, Comedy, Romance","helsinki, finland, salesclerk, garbage","Nikander, a rubbish collector and would-be ent...",/nj01hspawPof0mJmlgfjuLyJuRN.jpg,Shadows in Paradise (1986)
2,5,Four Rooms,"Quentin Tarantino, Robert Rodriguez, Allison A...",98.0,1995,False,Comedy,"hotel, new year's eve, witch, bet, sperm, hote...",It's Ted the Bellhop's first night on the job....,/75aHn1NOYXh4M7L5shoeQ6NGykP.jpg,Four Rooms (1995)
3,6,Judgment Night,Stephen Hopkins,109.0,1993,False,"Action, Crime, Thriller","drug dealer, chicago, illinois, escape, one ni...","While racing to a boxing match, Frank, Mike, J...",/3rvvpS9YPM5HB2f4HYiNiJVtdam.jpg,Judgment Night (1993)
4,8,Life in Loops (A Megacities RMX),Timo Novotny,80.0,2006,False,Documentary,megacities,Timo Novotny labels his new project an experim...,/x7Sz339F2oC8mBf0DHCQpKizXaL.jpg,Life in Loops (A Megacities RMX) (2006)


In [23]:
rated_movies = rated_movies[['user_id', 'film_title', 'rating', 'film_year']]

In [24]:
rated_movies['film_year'] = rated_movies['film_year'].apply(format_year)    

In [25]:
rated_movies['title_year'] = rated_movies['film_title'] + ' (' + rated_movies['film_year'].astype(str) + ')'

In [26]:
rated_movies.head()

Unnamed: 0,user_id,film_title,rating,film_year,title_year
0,user_001,Scream 7,1.0,2026,Scream 7 (2026)
1,user_001,Materialists,,2025,Materialists (2025)
2,user_001,How to Train Your Dragon,6.0,2025,How to Train Your Dragon (2025)
3,user_001,Predator: Killer of Killers,8.0,2025,Predator: Killer of Killers (2025)
4,user_001,Echo Valley,6.0,2025,Echo Valley (2025)


In [27]:
movies = movies.fillna('')

movies['features'] = (
    movies['title'].astype(str) + ' ' +
    movies['director'].astype(str) + ' ' +
    movies['genres'].astype(str) + ' ' +
    movies['keywords'].astype(str) + ' ' +
    movies['overview'].astype(str) 
)

In [28]:
def clean_text(text):
    if pd.isnull(text):
        return ""
    # Remove pontuação e coloca em minúsculas
    text = text.lower()
    text = re.sub(r'[.,]', '', text)
    return text

movies['features'] = movies['features'].apply(clean_text)

In [29]:
movie_name = "Titanic"
resultado = movies[movies['title'] == movie_name]

In [30]:
resultado

Unnamed: 0,id,title,director,runtime,year,adult,genres,keywords,overview,poster_path,title_year,features
471,597,Titanic,James Cameron,194.0,1997.0,False,"Drama, Romance","epic, ship, drowning, panic, shipwreck, evacua...",101-year-old Rose DeWitt Bukater tells the sto...,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,Titanic (1997),titanic james cameron drama romance epic ship ...
6052,11021,Titanic,"Werner Klingler, Herbert Selpin",88.0,1943.0,False,"Action, Drama, History","sea, captain, passenger, cruise, iceberg, tita...",This little-known German film retells the true...,/Al7oIXQ4dZAofBTZWm6OiXS3MEa.jpg,Titanic (1943),titanic werner klingler herbert selpin action ...
10006,16535,Titanic,Jean Negulesco,98.0,1953.0,False,"Drama, Romance",titanic,"Unhappily married, Julia Sturges decides to go...",/rEPzO9I6LCk6Mxg1X4BsBk6oA3V.jpg,Titanic (1953),titanic jean negulesco drama romance titanic u...
231685,357517,Titanic,Lutz Büscher,0.0,1984.0,False,Drama,,,/yi73me6Jl3zDelS9pQK5jtMRhsc.jpg,Titanic (1984),titanic lutz büscher drama
309328,455679,Titanic,,0.0,,False,,,,,Titanic (),titanic
536374,760524,Titanic,,0.0,2018.0,False,,,"""Titanic"" is a Punjabi feature film. It is fam...",,Titanic (2018),"titanic ""titanic"" is a punjabi feature film..."
784795,1124589,Titanic,Kim Harrington,55.0,2023.0,False,Documentary,,This documentary explores the incredible histo...,/l7rAG4P16SNFdPmWfTC8ZQydOon.jpg,Titanic (2023),titanic kim harrington documentary this docum...


In [31]:
movies.head()

Unnamed: 0,id,title,director,runtime,year,adult,genres,keywords,overview,poster_path,title_year,features
0,2,Ariel,Aki Kaurismäki,73.0,1988,False,"Drama, Comedy, Romance","prison, underdog, helsinki, finland, factory w...",After the coal mine he works at closes and his...,/ojDg0PGvs6R9xYFodRct2kdI6wC.jpg,Ariel (1988),ariel aki kaurismäki drama comedy romance pris...
1,3,Shadows in Paradise,Aki Kaurismäki,74.0,1986,False,"Drama, Comedy, Romance","helsinki, finland, salesclerk, garbage","Nikander, a rubbish collector and would-be ent...",/nj01hspawPof0mJmlgfjuLyJuRN.jpg,Shadows in Paradise (1986),shadows in paradise aki kaurismäki drama comed...
2,5,Four Rooms,"Quentin Tarantino, Robert Rodriguez, Allison A...",98.0,1995,False,Comedy,"hotel, new year's eve, witch, bet, sperm, hote...",It's Ted the Bellhop's first night on the job....,/75aHn1NOYXh4M7L5shoeQ6NGykP.jpg,Four Rooms (1995),four rooms quentin tarantino robert rodriguez ...
3,6,Judgment Night,Stephen Hopkins,109.0,1993,False,"Action, Crime, Thriller","drug dealer, chicago, illinois, escape, one ni...","While racing to a boxing match, Frank, Mike, J...",/3rvvpS9YPM5HB2f4HYiNiJVtdam.jpg,Judgment Night (1993),judgment night stephen hopkins action crime th...
4,8,Life in Loops (A Megacities RMX),Timo Novotny,80.0,2006,False,Documentary,megacities,Timo Novotny labels his new project an experim...,/x7Sz339F2oC8mBf0DHCQpKizXaL.jpg,Life in Loops (A Megacities RMX) (2006),life in loops (a megacities rmx) timo novotny ...


In [32]:
vectorizer = CountVectorizer(token_pattern=r"(?u)\b\w+\b")
X = vectorizer.fit_transform(movies['features'])

knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(X)

In [33]:
def compute_distances(indices, X, knn, title_year, movies, n_recommendations):
    print(f"Debug: Computando distâncias para índices: {indices}")
    distances, indices = knn.kneighbors(X[indices], n_neighbors=n_recommendations+1)
    print(f"Debug: Distâncias calculadas: {distances}")
    return [movies.iloc[i]['title_year'] for i in indices.flatten() if movies.iloc[i]['title_year'] != title_year]

In [34]:
def get_recommendations_by_content(title_year, n_recommendations=80):
    if title_year not in movies['title_year'].values:
        return []
    idx = movies[movies['title_year'] == title_year].index[0]
    distances, indices = knn.kneighbors(X[idx], n_neighbors=n_recommendations+1)
    recommended_title_years = [movies.iloc[i]['title_year'] for i in indices.flatten() if movies.iloc[i]['title_year'] != title_year]
    return recommended_title_years[:n_recommendations]

In [35]:
def get_recommendations_by_content_parallel(title_year, n_recommendations=80):
    print(f"Debug: Iniciando para {title_year}")  # Verifica o título do filme
    if title_year not in movies['title_year'].values:
        print(f"Debug: {title_year} não encontrado em movies['title_year']")
        return []

    idx = movies[movies['title_year'] == title_year].index[0]
    print(f"Debug: Índice do filme {title_year}: {idx}")  # Verifica o índice do filme

    num_cores = cpu_count()
    print(f"Debug: Número de núcleos disponíveis: {num_cores}")  # Verifica núcleos disponíveis

    indices_split = np.array_split(range(X.shape[0]), num_cores)
    print(f"Debug: Divisão de índices: {indices_split}")  # Verifica a divisão dos índices

    try:
        print("Debug: Iniciando Pool")
        with Pool(num_cores) as pool:
            results = pool.starmap(
                compute_distances,
                [(indices, X, knn, title_year, movies, n_recommendations) for indices in indices_split]
            )
        print(f"Debug: Resultados obtidos: {results}")  # Verifica os resultados obtidos
    except Exception as e:
        print(f"Erro durante paralelismo: {e}")  # Captura erros no paralelismo
        return []

    recommended_title_years = [item for sublist in results for item in sublist]
    print(f"Debug: Recomendações finais: {recommended_title_years}")  # Verifica as recomendações finais
    return recommended_title_years[:n_recommendations]

In [36]:
def get_recommendations_by_ratings(title_year, n_recommendations=18):
    content_recs = get_recommendations_by_content(title_year, n_recommendations=140)
    if not content_recs:
        return []
    pivot = rated_movies.pivot_table(index='title_year', columns='user_id', values='rating').fillna(0)
    filtered_titles = [title_year] + [rec for rec in content_recs if rec in pivot.index]
    filtered_pivot = pivot.loc[filtered_titles]
    knn_ratings = NearestNeighbors(metric='cosine', algorithm='brute')
    knn_ratings.fit(filtered_pivot.values)
    idx = filtered_pivot.index.get_loc(title_year)
    distances, indices = knn_ratings.kneighbors([filtered_pivot.iloc[idx].values], n_neighbors=min(n_recommendations+1, len(filtered_titles)))
    recommended_title_years = [filtered_pivot.index[i] for i in indices.flatten() if filtered_pivot.index[i] != title_year]
    return recommended_title_years[:n_recommendations]

In [37]:
def get_recommendations_by_ratings_parallel(title_year, n_recommendations=18):
    content_recs = get_recommendations_by_content_parallel(title_year, n_recommendations=140)
    if not content_recs:
        return []

    pivot = rated_movies.pivot_table(index='title_year', columns='user_id', values='rating').fillna(0)
    filtered_titles = [title_year] + [rec for rec in content_recs if rec in pivot.index]
    if len(filtered_titles) <= 1:
        return []

    filtered_pivot = pivot.loc[filtered_titles]

    num_cores = cpu_count()
    indices_split = np.array_split(range(filtered_pivot.shape[0]), num_cores)

    with Pool(num_cores) as pool:
        results = pool.starmap(
            compute_distances,
            [(indices, X, knn, title_year, movies, n_recommendations) for indices in indices_split]
        )

    recommended_title_years = [item for sublist in results for item in sublist]
    return recommended_title_years[:n_recommendations]

In [None]:
content_recs = {}

# Itera sobre todos os filmes no DataFrame `movies`
total_movies = len(movies['title_year'])
for idx, title_year in enumerate(movies['title_year'], 1):
    try:
        # Obtém as recomendações por notas de usuários
        recommendations = get_recommendations_by_content_parallel(title_year, n_recommendations=80)
        content_recs [title_year] = recommendations
    except Exception as e:
        print(f"Erro ao calcular recomendações para {title_year}: {e}")
    print(f"Processado {idx} de {total_movies} filmes")
# Salva o dicionário em um arquivo pickle
with open('content_recs.pkl', 'wb') as f:
    pickle.dump(content_recs, f)

Using device: cuda


KeyboardInterrupt: 

In [38]:
rated_movies_with_ratings = rated_movies[rated_movies['rating'] > 0]
movies_with_ratings = movies[movies['title_year'].isin(rated_movies_with_ratings['title_year'])]

In [29]:
# Salvar recomendações por notas de usuários considerando conteúdo primeiro
rating_recs = {}

# Itera sobre todos os filmes no DataFrame `movies`
total_movies = len(movies_with_ratings['title_year'])
for idx, title_year in enumerate(movies_with_ratings['title_year'], 1):
    try:
        # Obtém as recomendações por notas de usuários
        recommendations = get_recommendations_by_ratings(title_year, n_recommendations=18)
        rating_recs[title_year] = recommendations
        print(f"Processado {idx} de {total_movies} filmes")
    except Exception as e:
        print(f"Erro ao calcular recomendações para {title_year}: {e}")
    

# Salva o dicionário em um arquivo pickle
with open('rating_recs.pkl', 'wb') as f:
    pickle.dump(rating_recs, f)

Processado 1 de 42538 filmes
Processado 2 de 42538 filmes
Processado 3 de 42538 filmes
Processado 4 de 42538 filmes
Processado 5 de 42538 filmes
Processado 6 de 42538 filmes
Processado 7 de 42538 filmes
Processado 8 de 42538 filmes
Processado 9 de 42538 filmes
Processado 10 de 42538 filmes
Processado 11 de 42538 filmes
Processado 12 de 42538 filmes
Processado 13 de 42538 filmes
Processado 14 de 42538 filmes
Processado 15 de 42538 filmes
Processado 16 de 42538 filmes
Processado 17 de 42538 filmes
Processado 18 de 42538 filmes
Processado 19 de 42538 filmes
Processado 20 de 42538 filmes
Processado 21 de 42538 filmes
Processado 22 de 42538 filmes
Processado 23 de 42538 filmes
Processado 24 de 42538 filmes
Processado 25 de 42538 filmes
Processado 26 de 42538 filmes
Processado 27 de 42538 filmes
Processado 28 de 42538 filmes
Processado 29 de 42538 filmes
Processado 30 de 42538 filmes
Processado 31 de 42538 filmes
Processado 32 de 42538 filmes
Processado 33 de 42538 filmes
Processado 34 de 42

In [20]:
print(get_recommendations_by_content('Indiana Jones and the Last Crusade'))

['God Disposes', 'Eternal', 'The Old Cowboy', 'My Father Iqbal', 'Father and Son', 'Hitler: Beast of Berlin', 'Morning Star', 'Ghouls', 'The Cord of Life', 'The Han River', 'Hope in the Holy Land: Delving Beneath the Surface of the Israeli-Palestinian Conflict', 'Blood on the Asphalt', 'The Day of the Crows', 'Heart Beats of Long Ago', 'Consolation', 'Wind Back', 'The Grassland Whisper', 'The Devil in Sofia', 'No Mill No Meal', 'Won in the Fifth', "Majub's Journey", 'Der rote Reiter', 'The Misadventure of a French Gentleman Without Pants at the Zandvoort Beach', 'Glory of Legend', 'Premutos: The Fallen Angel', '家族ケチャップ', 'Fokak Meny', 'An Apple from Paradise', 'The Line Will Break', 'Tempest', 'The Stepmother', 'The Fisher-Maid', 'Summer in the Golden Valley', 'Traveler', 'The New Man', 'Youth on the palm of the imp', 'The Intruder', 'The Eremites', 'Rebels Of The Cities', 'Blood Loss', 'Alarm', 'The Old Man and the Bird', 'The Color of the Sun', 'Lost on the Branch', 'Jim is Fond of G

In [29]:
print(get_recommendations_by_ratings('Titanic (1997)', n_recommendations=6))

['A Night to Remember (1958)', 'Titanic: The Legend Goes On... (2000)', 'United (2011)', 'And the Ship Sails On (1983)', 'The Fabulous Baron Munchausen (1962)', 'Titanic: The Musical (2023)']


In [None]:
print(get_recommendations_by_content_parallel('Titanic (1997)', n_recommendations=6))

Debug: Iniciando para Titanic (1997)
Debug: Índice do filme Titanic (1997): 471
Debug: Número de núcleos disponíveis: 16
Debug: Divisão de índices: [array([    0,     1,     2, ..., 67404, 67405, 67406]), array([ 67407,  67408,  67409, ..., 134811, 134812, 134813]), array([134814, 134815, 134816, ..., 202218, 202219, 202220]), array([202221, 202222, 202223, ..., 269625, 269626, 269627]), array([269628, 269629, 269630, ..., 337031, 337032, 337033]), array([337034, 337035, 337036, ..., 404437, 404438, 404439]), array([404440, 404441, 404442, ..., 471843, 471844, 471845]), array([471846, 471847, 471848, ..., 539249, 539250, 539251]), array([539252, 539253, 539254, ..., 606655, 606656, 606657]), array([606658, 606659, 606660, ..., 674061, 674062, 674063]), array([674064, 674065, 674066, ..., 741467, 741468, 741469]), array([741470, 741471, 741472, ..., 808873, 808874, 808875]), array([808876, 808877, 808878, ..., 876279, 876280, 876281]), array([876282, 876283, 876284, ..., 943685, 943686,

In [None]:
print(get_recommendations_by_ratings_parallel('Titanic (1997)', n_recommendations=6))

In [None]:


# Salva o DataFrame movies em um arquivo pickle
with open('movies_info.pkl', 'wb') as f:
    pickle.dump(movies, f)
# Salva o DataFrame rated_movies em um arquivo pickle
with open('rated_movies.pkl', 'wb') as f:
    pickle.dump(rated_movies, f)    

In [None]:
import time

# Seleciona um subconjunto de filmes para medir o tempo médio
subset_movies = movies['title_year'][:10]  # Exemplo com 10 filmes
start_time = time.time()

for title_year in subset_movies:
    try:
        recommendations = get_recommendations_by_content_parallel(title_year, n_recommendations=80)
    except Exception as e:
        print(f"Erro ao calcular recomendações para {title_year}: {e}")

end_time = time.time()

# Calcula o tempo médio por filme
average_time_per_movie = (end_time - start_time) / len(subset_movies)
print(f"Tempo médio por filme: {average_time_per_movie:.2f} segundos")

# Previsão para todos os filmes
total_movies = len(movies['title_year'])
predicted_total_time = average_time_per_movie * total_movies
print(f"Tempo total estimado para {total_movies} filmes: {predicted_total_time:.2f} segundos")

Debug: Iniciando para Ariel (1988)
Debug: Índice do filme Ariel (1988): 0
Debug: Número de núcleos disponíveis: 16
Debug: Divisão de índices: [array([    0,     1,     2, ..., 67404, 67405, 67406]), array([ 67407,  67408,  67409, ..., 134811, 134812, 134813]), array([134814, 134815, 134816, ..., 202218, 202219, 202220]), array([202221, 202222, 202223, ..., 269625, 269626, 269627]), array([269628, 269629, 269630, ..., 337031, 337032, 337033]), array([337034, 337035, 337036, ..., 404437, 404438, 404439]), array([404440, 404441, 404442, ..., 471843, 471844, 471845]), array([471846, 471847, 471848, ..., 539249, 539250, 539251]), array([539252, 539253, 539254, ..., 606655, 606656, 606657]), array([606658, 606659, 606660, ..., 674061, 674062, 674063]), array([674064, 674065, 674066, ..., 741467, 741468, 741469]), array([741470, 741471, 741472, ..., 808873, 808874, 808875]), array([808876, 808877, 808878, ..., 876279, 876280, 876281]), array([876282, 876283, 876284, ..., 943685, 943686, 94368

In [2]:
def view_pkl_content(file_path):
    try:
        with open(file_path, "rb") as f:
            data = pickle.load(f)
            print(f"Conteúdo de {file_path}:")
            print(data)
    except Exception as e:
        print(f"Erro ao carregar {file_path}: {e}")


In [11]:
# Carregar o arquivo pickle
with open('content_recs.pkl', 'rb') as f:
    data = pickle.load(f)

# Converter o conteúdo em um DataFrame e renomear a primeira coluna
df = pd.DataFrame.from_dict(data, orient='index')
df.index.name = 'title_year'  # Define o nome da primeira coluna como 'title_year'
df.reset_index(inplace=True)  # Move o índice para uma coluna regular
print(df.head())

Empty DataFrame
Columns: []
Index: []


In [10]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
Ariel (1988),He Was a Quiet Man (2007),Ordinary Man (2005),The Kaiser's Lackey (1951),People Out There (2012),,,,,,,,,,,,,,
Shadows in Paradise (1986),Final Cut (1980),Body Slam (1986),"Sex, Fear, and Hamburgers (2018)",Denmark (2019),Total Dhamaal (2019),,,,,,,,,,,,,
Four Rooms (1995),Golden Eighties (1986),Night of the Living Carrots (2011),The Palace (2023),Scene of the Crime (1986),Pelada (2010),Heavier Trip (2024),Hatching Pete (2009),,,,,,,,,,,
Judgment Night (1993),A Lonely Place to Die (2011),Casanova Gene (2018),Even – As You and I (1937),In the Blind Spot (2023),Io Capitano (2023),Strong Waters (2018),Organics (1999),,,,,,,,,,,
Star Wars (1977),The Empire Strikes Back (1980),Star Wars: The Clone Wars (2008),Conan the Destroyer (1984),Batman: Return of the Caped Crusaders (2016),The Indian Tomb (1959),The Otherworld (2016),Conscience (1968),,,,,,,,,,,
