In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity


def load_data():
    movies = pd.read_csv("datas/movies.csv")
    ratings = pd.read_csv("datas/ratings_filtered_vf.csv").head(200000)
    return movies, ratings

def preprocess_data(movies, ratings):
    data = ratings.merge(movies, on="movieId")
    
    movie_user_matrix = data.pivot_table(index="title", columns="userId", values="rating")
    
    movie_user_matrix.fillna(0, inplace=True)
    
    return movie_user_matrix

def calculate_similarity(movie_user_matrix):
    movie_similarity = cosine_similarity(movie_user_matrix)
    
    similarity_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)
    
    return similarity_df

def recommend_movies_based_on_movie(movie_title, similarity_df, n_recommendations=5):
    similar_movies = similarity_df[movie_title].sort_values(ascending=False).iloc[1:n_recommendations+1]
    
    return similar_movies.index.tolist()

movies, ratings = load_data()

In [2]:

movie_user_matrix = preprocess_data(movies, ratings)

movie_user_matrix.head()

userId,1,2,3,9,10,13,15,16,17,18,...,1403,1405,1407,1409,1411,1413,1414,1416,1417,1420
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#Alive (2020),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"'burbs, The (1989)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0


In [5]:

similarity_df = calculate_similarity(movie_user_matrix)
similarity_df.head()

title,"""Great Performances"" Cats (1998)",#Alive (2020),'71 (2014),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),(Untitled) (2009),*batteries not included (1987),...And Justice for All (1979),...,"tick, tick...BOOM! (2021)",xXx (2002),xXx: Return of Xander Cage (2017),xXx: State of the Union (2005),¡Three Amigos! (1986),¿Quién mató a Bambi? (2013),À nos amours (1983),Çöpçüler Kralı (1977),Ölümlü Dünya (2018),İtirazım Var (2014)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",1.0,0.0,0.0,0.0,0.0,0.0,0.111909,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#Alive (2020),0.0,1.0,0.0,0.0,0.134256,0.0,0.065061,0.0,0.0,0.0,...,0.203814,0.087004,0.0,0.0,0.0,0.284517,0.0,0.0,0.0,0.0
'71 (2014),0.0,0.0,1.0,0.0,0.0,0.0,0.054142,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.536875,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"'burbs, The (1989)",0.0,0.134256,0.0,0.0,1.0,0.20067,0.130697,0.0,0.299969,0.061684,...,0.1783,0.169984,0.0,0.0,0.194899,0.319422,0.0,0.0,0.0,0.0


In [6]:

movie_title = "Toy Story (1995)"
recommendations = recommend_movies_based_on_movie(movie_title, similarity_df)

print(f"Films similaires à '{movie_title}':")
for movie in recommendations:
    print(movie)

Films similaires à 'Toy Story (1995)':
Toy Story 2 (1999)
Star Wars: Episode IV - A New Hope (1977)
Forrest Gump (1994)
Jurassic Park (1993)
Shrek (2001)
