In [1]:
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer


In [2]:
def read_ratings(ratings_csv: str, data_dir: str = "/home/antoine/Ml_Ops_Movies_Reco/app/shared_volume/raw") -> pd.DataFrame:
    """
    Lit le fichier CSV contenant les évaluations des films.

    :param ratings_csv: Nom du fichier CSV contenant les évaluations.
    :param data_dir: Répertoire où se trouve le fichier CSV.
    :return: DataFrame contenant les évaluations.
    """
    data = pd.read_csv(os.path.join(data_dir, ratings_csv))
    print("Dataset ratings chargé")
    return data

def read_movies(movies_csv: str, data_dir: str = "/home/antoine/Ml_Ops_Movies_Reco/app/shared_volume/raw") -> pd.DataFrame:
    """
    Lit le fichier CSV contenant les informations sur les films.

    :param movies_csv: Nom du fichier CSV contenant les informations sur les films.
    :param data_dir: Répertoire où se trouve le fichier CSV.
    :return: DataFrame contenant les informations sur les films.
    """
    df = pd.read_csv(os.path.join(data_dir, movies_csv))
    print("Dataset movies chargé")
    return df

def read_links(links_csv: str, data_dir: str = "/home/antoine/Ml_Ops_Movies_Reco/app/shared_volume/raw") -> pd.DataFrame:
    """
    Lit le fichier CSV contenant les informations sur les liens des affiches scrappés.

    :param links_csv: Nom du fichier CSV contenant les liens des affiches.
    :param data_dir: Répertoire où se trouve le fichier CSV.
    :return: DataFrame contenant movieId et lien vers les affiches.
    """
    df = pd.read_csv(os.path.join(data_dir, links_csv))
    df = df[['movieId', 'cover_link']]
    print("Dataset links chargé")
    return df

In [3]:
# Chargement des données
ratings = read_ratings('ratings.csv')
movies = read_movies('movies.csv')
links = read_links('links2.csv')

Dataset ratings chargé
Dataset movies chargé
Dataset links chargé


In [4]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [5]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [6]:
links.head()

Unnamed: 0,movieId,cover_link
0,1,https://m.media-amazon.com/images/M/MV5BMDU2ZW...
1,2,https://m.media-amazon.com/images/M/MV5BZTk2Zm...
2,3,https://m.media-amazon.com/images/M/MV5BMDkwYT...
3,4,https://m.media-amazon.com/images/M/MV5BZWU4Nz...
4,5,https://m.media-amazon.com/images/M/MV5BOTMwNz...


In [8]:
new_df = ratings[['userId', 'movieId', 'rating']].merge(movies[['movieId', 'title']], on = 'movieId', how = 'left')

new_df = new_df.merge(links, on = 'movieId', how = 'left')

new_df.head()

Unnamed: 0,userId,movieId,rating,title,cover_link
0,1,2,3.5,Jumanji (1995),https://m.media-amazon.com/images/M/MV5BZTk2Zm...
1,1,29,3.5,"City of Lost Children, The (CitÃ© des enfants ...",https://m.media-amazon.com/images/M/MV5BYjBlOG...
2,1,32,3.5,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),https://m.media-amazon.com/images/M/MV5BN2Y2OW...
3,1,47,3.5,Seven (a.k.a. Se7en) (1995),https://m.media-amazon.com/images/M/MV5BOTUwOD...
4,1,50,3.5,"Usual Suspects, The (1995)",https://m.media-amazon.com/images/M/MV5BYTViNj...


In [18]:
# Filtrer le DataFrame pour l'utilisateur avec userId égal à 1 et trier par la colonne 'rating'
sorted_df = (new_df[new_df['userId'] == 1].sort_values(by='rating', ascending=False))['title'][:5]

sorted_df.head(10)

170                                        Freaks (1932)
131    Lord of the Rings: The Fellowship of the Ring,...
142        Lord of the Rings: The Two Towers, The (2002)
158    Lord of the Rings: The Return of the King, The...
30     Star Wars: Episode V - The Empire Strikes Back...
Name: title, dtype: object

In [19]:
def best_user_movies(user_id, n =5):
    new_df = ratings[['userId', 'movieId', 'rating']].merge(movies[['movieId', 'title']], on = 'movieId', how = 'left')
    new_df = new_df.merge(links, on = 'movieId', how = 'left')
    top_user_movies = (new_df[new_df['userId'] == user_id].sort_values(by='rating', ascending=False))[:n]
    top_title = list(top_user_movies['title'])
    top_cover = list(top_user_movies['cover_link'])
    return top_title, top_cover



In [20]:

top_title = best_user_movies(2)[0]
top_cover = best_user_movies(2)[1]

print(top_title)
print(top_cover)

['Time Machine, The (1960)', 'Star Wars: Episode V - The Empire Strikes Back (1980)', 'Friday the 13th (1980)', 'Grumpy Old Men (1993)', "Mr. Holland's Opus (1995)"]
['https://m.media-amazon.com/images/M/MV5BYzEwNGJiY2EtYTZmYi00NjZhLWEwZmUtMjc0MTJlODgzZmY4XkEyXkFqcGdeQXVyMDI2NDg0NQ@@._V1_QL75_UX190_CR0,4,190,281_.jpg', 'https://m.media-amazon.com/images/M/MV5BYmU1NDRjNDgtMzhiMi00NjZmLTg5NGItZDNiZjU5NTU4OTE0XkEyXkFqcGdeQXVyNzkwMjQ5NzM@._V1_QL75_UX190_CR0,7,190,281_.jpg', 'https://m.media-amazon.com/images/M/MV5BNWMxYTYzYWQtNGZmNy00MTg5LTk1N2MtNzQ5NjQxYjQ5NTJhXkEyXkFqcGdeQXVyMTQxNzMzNDI@._V1_QL75_UX190_CR0,1,190,281_.jpg', 'https://m.media-amazon.com/images/M/MV5BMzM1YjNiNDYtODRiOC00ZWM2LTk1MTItNjI5MjA5ZjEwYjAzXkEyXkFqcGc@._V1_QL75_UX190_CR0,2,190,281_.jpg', 'https://m.media-amazon.com/images/M/MV5BZGJkOTFkYjEtNzg5Zi00NWIyLTkyN2QtMDExNjk1NTg1NTQ1XkEyXkFqcGc@._V1_QL75_UX190_CR0,1,190,281_.jpg']


In [21]:
result = {
    str(f"best_user_movies_{i}") : {
        "user_best_tite": title,
        "user_best_cover":cover }
        for i, (title, cover) in enumerate(zip(top_title, top_cover))}

print(result)

{'best_user_movies0': {'user_best_tite': 'Time Machine, The (1960)', 'user_best_cover': 'https://m.media-amazon.com/images/M/MV5BYzEwNGJiY2EtYTZmYi00NjZhLWEwZmUtMjc0MTJlODgzZmY4XkEyXkFqcGdeQXVyMDI2NDg0NQ@@._V1_QL75_UX190_CR0,4,190,281_.jpg'}, 'best_user_movies1': {'user_best_tite': 'Star Wars: Episode V - The Empire Strikes Back (1980)', 'user_best_cover': 'https://m.media-amazon.com/images/M/MV5BYmU1NDRjNDgtMzhiMi00NjZmLTg5NGItZDNiZjU5NTU4OTE0XkEyXkFqcGdeQXVyNzkwMjQ5NzM@._V1_QL75_UX190_CR0,7,190,281_.jpg'}, 'best_user_movies2': {'user_best_tite': 'Friday the 13th (1980)', 'user_best_cover': 'https://m.media-amazon.com/images/M/MV5BNWMxYTYzYWQtNGZmNy00MTg5LTk1N2MtNzQ5NjQxYjQ5NTJhXkEyXkFqcGdeQXVyMTQxNzMzNDI@._V1_QL75_UX190_CR0,1,190,281_.jpg'}, 'best_user_movies3': {'user_best_tite': 'Grumpy Old Men (1993)', 'user_best_cover': 'https://m.media-amazon.com/images/M/MV5BMzM1YjNiNDYtODRiOC00ZWM2LTk1MTItNjI5MjA5ZjEwYjAzXkEyXkFqcGc@._V1_QL75_UX190_CR0,2,190,281_.jpg'}, 'best_user_movies4': {'