In [3]:
import os
import pandas as pd
import json
import pickle
from surprise.prediction_algorithms.matrix_factorization import SVD
import numpy as np
from rapidfuzz import process
from fastapi import APIRouter, HTTPException
from typing import Dict, Any, Optional
from prometheus_client import Counter, Histogram, CollectorRegistry
import time
from pydantic import BaseModel
import requests
import logging
from surprise import Reader
from surprise import Dataset

In [6]:
# ENSEMBLE DES FONCTIONS UTILISEES

# Ouverture fichier ratings
def read_ratings(data_dir: str = "/home/antoine/PROJET_MLOPS_RECO_MOVIES/data/raw/silver/processed_ratings.csv") -> pd.DataFrame:
    """
    Lit le fichier CSV contenant les évaluations des films.

    :param ratings_csv: Nom du fichier CSV contenant les évaluations.
    :param data_dir: Répertoire où se trouve le fichier CSV.
    :return: DataFrame contenant les évaluations.
    """
    data = pd.read_csv(data_dir)
    print("Dataset ratings chargé")
    return data

# Ouverture fichier movies
def read_movies(data_dir: str = "/home/antoine/PROJET_MLOPS_RECO_MOVIES/data/raw/silver/processed_movies.csv") -> pd.DataFrame:
    """
    Lit le fichier CSV contenant les informations sur les films.

    :param movies_csv: Nom du fichier CSV contenant les informations sur les films.
    :param data_dir: Répertoire où se trouve le fichier CSV.
    :return: DataFrame contenant les informations sur les films.
    """
    df = pd.read_csv(data_dir)
    print("Dataset movies chargé")
    return df

# Ouverture fichier links
def read_links(data_dir: str = "/home/antoine/PROJET_MLOPS_RECO_MOVIES/data/raw/silver/processed_links.csv") -> pd.DataFrame:
    """
    Lit le fichier CSV contenant les informations sur les liens des affiches scrappés.

    :param links_csv: Nom du fichier CSV contenant les liens des affiches.
    :param data_dir: Répertoire où se trouve le fichier CSV.
    :return: DataFrame contenant movieId et lien vers les affiches.
    """
    df = pd.read_csv(data_dir)
    print("Dataset links chargé")
    return df

# Chargement du dernier modèle
def load_model(model_name : str, directory = "/home/antoine/PROJET_MLOPS_RECO_MOVIES/data/models") :
    """Charge le modèle à partir d'un répertoire."""
    # Vérifier si le répertoire existe
    if not os.path.exists(directory):
        raise FileNotFoundError(f"Le répertoire {directory} n'existe pas.")
    # Charger le modèle
    filepath = os.path.join(directory, model_name)
    with open(filepath, 'rb') as file:
        model = pickle.load(file)
        print(f'Modèle chargé depuis {filepath}')
    return model


In [8]:
ratings = read_ratings()

ratings.head()

Chargement du fichier /home/antoine/PROJET_MLOPS_RECO_MOVIES/data/raw/silver/processed_ratings.csv réussi.
Chargement du fichier /home/antoine/PROJET_MLOPS_RECO_MOVIES/data/raw/silver/processed_movies.csv réussi.
Chargement du fichier /home/antoine/PROJET_MLOPS_RECO_MOVIES/data/raw/silver/processed_links.csv réussi.


In [9]:
movies = read_movies()
movies.head()

Modèle chargé depuis /home/antoine/PROJET_MLOPS_RECO_MOVIES/data/models/svd_model_v1.pkl


In [10]:
links = read_links()

links.head()

In [13]:
tmdb_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJjZWY5MGUyOWMzZGQ0ZTg0Y2IzMjdhMWRiMzlhNWY1MSIsIm5iZiI6MTczMTAwMjkyOC44MzM2NTczLCJzdWIiOiI2NzI3Y2MwNTU5MTgxMzdjZmMzOTljMmQiLCJzY29wZXMiOlsiYXBpX3JlYWQiXSwidmVyc2lvbiI6MX0.rfjWSNDMqPka_PXWbTKEUt6pVHg4g_7nukU7yYU1Img"


# Fonction pour obtenir des recommandations pour un utilisateur donné
def get_user_recommendations(
    ratings_df, user_id: int, model: SVD, n_recommendations: int = 24
):
    """Obtenir des recommandations pour un utilisateur donné."""
    # Créer un DataFrame contenant tous les films
    all_movies = ratings_df["movieid"].unique()

    # Obtenir les films déjà évalués par l'utilisateur
    rated_movies = ratings_df[ratings_df["userid"] == user_id]["movieid"].tolist()

    # Trouver les films non évalués par l'utilisateur
    unseen_movies = [movie for movie in all_movies if movie not in rated_movies]

    # Préparer les prédictions pour les films non évalués
    predictions = []
    for movie_id in unseen_movies:
        pred = model.predict(user_id, movie_id)
        predictions.append(
            (movie_id, pred.est)
        )  # Ajouter l'ID du film et la note prédite

    # Trier les prédictions par note prédite (descendant) et prendre les meilleures n_recommendations
    top_n = sorted(predictions, key=lambda x: x[1], reverse=True)[:n_recommendations]
    top_n = [i[0] for i in top_n]
    return top_n  # Retourner les meilleures recommandations


# Focntion qui regroupe les recommandations
def get_content_based_recommendations(movie_idx, cosine_sim, n_recommendations=24):
    sim_scores = list(enumerate(cosine_sim[movie_idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1 : (n_recommendations + 1)]
    similar_movies = [i[0] for i in sim_scores]
    return similar_movies


def format_movie_id(movie_id):
    """Transforme en ImdbId et  Formate l'ID du film pour qu'il ait 7 chiffres."""
    imdbid_format = str(movie_id).zfill(7)  # Formate l'ID pour qu'il ait 7 chiffres
    return imdbid_format

def api_tmdb_request(movie_ids):
    """Effectue des requêtes à l'API TMDB pour récupérer les informations des films."""
    results = {}

    for movie_id in movie_ids:
        formatted_id = format_movie_id(movie_id)
        url = f"https://api.themoviedb.org/3/find/tt{formatted_id}?external_source=imdb_id"

        headers = {
            "accept": "application/json",
            "Authorization": f"Bearer {tmdb_token}",
        }

        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            data = response.json()
            if data["movie_results"]:
                index = len(results) + 1
                movie_info = data["movie_results"][0]
                results[str(index)] = {
                    "title": movie_info["title"],
                    "vote_average": movie_info["vote_average"],
                    "poster_path": f"http://image.tmdb.org/t/p/w185{movie_info['poster_path']}",
                }

        # Vérifie si nous avons atteint 12 résultats
        if len(results) >= 12:
            break

    return results


# Recherche un titre proche de la requête
def movie_finder(all_titles, title):
    """
    Trouve le titre de film le plus proche d'une requête donnée.

    Args:
        all_titles (list): Liste de tous les titres de films disponibles.
        title (str): Titre du film à rechercher.

    Returns:
        str: Le titre du film le plus proche trouvé.
    """
    closest_match = process.extractOne(title, all_titles)
    return (
        closest_match[0] if closest_match else None
    )  # Retourne None si aucun match n'est trouvé

[78064, 26109, 88570, 105250, 44421, 81733, 85012, 113315, 128830, 91617, 114635, 118880, 4778, 35386, 95604]


In [14]:
df = pd.merge(ratings, movies[['movieid', 'genres']], on = 'movieid', how = 'left')
model_svd = load_model("model_SVD.pkl")
# Chargement de la matrice cosinus similarity
similarity_cosinus = np.load("/home/antoine/PROJET_MLOPS_RECO_MOVIES/data/models/cosine_similarity_matrix.npy")
# Création d'un dataframe pour les liens entre les films et les ID IMDB
movies_links_df = movies.merge(links, on="movieid", how="left")
# Création de dictionnaires pour faciliter l'accès aux titres et aux couvertures des films par leur ID
movie_idx = dict(zip(movies["title"], list(movies.index)))
# Création de dictionnaires pour accéder facilement aux titres et aux couvertures des films par leur ID
movie_titles = dict(zip(movies["movieid"], movies["title"]))
# Créer un dictionnaire pour un accès rapide
imdb_dict = dict(zip(movies_links_df["movieid"], movies_links_df["imdbid"]))
# Créer une liste de tous les titres de films
all_titles = movies["title"].tolist()
print("############ FIN DES CHARGEMENTS ############")

[428040, 57591, 73887, 432232, 112120, 147039, 56322, 3576084, 1504691, 310313, 3521134, 2326554, 264476, 204058, 996994]


In [15]:
def get_user_recommendations(
    ratings_df, user_id: int, model: SVD, n_recommendations: int = 24
):
    """Obtenir des recommandations pour un utilisateur donné."""
    # Initialiser une liste vide pour stocker les paires (utilisateur, movie) pour le jeu "anti-testset"
    anti_testset = []
    # Convertir l'ID de l'utilisateur externe en l'ID interne utilisé par Surprise
    targetUser = train_set.to_inner_uid(user_id)
    # Obtenir la valeur de remplissage à utiliser (moyenne globale des notes du jeu d'entraînement)
    moyenne = train_set.global_mean
    # Obtenir les évaluations de l'utilisateur cible pour les movies
    user_note = train_set.ur[targetUser]
    # Extraire la liste des movies notés par l'utilisateur
    user_movie = [item for (item,_) in (user_note)]
    # Obtenir toutes les notations du jeu d'entraînement
    ratings = train_set.all_ratings()
    # Boucle sur tous les items du jeu d'entraînement
    for movie in train_set.all_items():
    # Si l'item n'a pas été noté par l'utilisateur
        if movie not in user_movie:
            # Ajouter la paire (utilisateur, movie, valeur de remplissage) à la liste "anti-testset"
            anti_testset.append((user_id, train_set.to_raw_iid(movie), moyenne))
    predictionsSVD = model.test(anti_testset)
    # Convertir les prédictions en un DataFrame pandas
    predictionsSVD = pd.DataFrame(predictionsSVD)
    # Trier les prédictions par la colonne 'est' (estimation) en ordre décroissant
    predictionsSVD.sort_values(by=['est'], inplace=True, ascending=False)
    # Afficher les 10 meilleures prédictions
    return predictionsSVD["iid"].values[:n_recommendations]

{'1': {'title': 'Ween: Live in Chicago', 'vote_average': 9.667, 'poster_path': 'http://image.tmdb.org/t/p/w185/bJ0BUNEIgDUtZRnkodAPsJC0CA5.jpg'}, '2': {'title': 'Crooks in Clover', 'vote_average': 7.74, 'poster_path': 'http://image.tmdb.org/t/p/w185/1uYCIsMpX7FtWu2xO1YMIAQYvLR.jpg'}, '3': {'title': 'Welfare', 'vote_average': 7.1, 'poster_path': 'http://image.tmdb.org/t/p/w185/1eFgNI93rRSOFZZ9cajF3tMmGPc.jpg'}, '4': {'title': 'Ménilmontant', 'vote_average': 7.4, 'poster_path': 'http://image.tmdb.org/t/p/w185/hWafMLPvy6w68YI7NgYwfNqhlYT.jpg'}, '5': {'title': 'The Given Word', 'vote_average': 8.1, 'poster_path': 'http://image.tmdb.org/t/p/w185/cN8Lk5bm4dQX8enBPP5LxQRdn9k.jpg'}, '6': {'title': 'Zero Motivation', 'vote_average': 6.5, 'poster_path': 'http://image.tmdb.org/t/p/w185/o47GwtgXRD6k8qxcJseQK2Fa9C0.jpg'}, '7': {'title': 'Plastic Bag', 'vote_average': 7.5, 'poster_path': 'http://image.tmdb.org/t/p/w185/pZOKKv1p8iz3CtcIG7Vi7nhMO8o.jpg'}, '8': {'title': 'A New Life', 'vote_average': 7

In [17]:
test = get_user_recommendations(ratings, 35022, model_svd, 24)
print(test)

{'1': {'title': 'The War at Home', 'vote_average': 7.2, 'poster_path': 'http://image.tmdb.org/t/p/w185/tDijvB2Nfmosm39jrI0vQa8cNSm.jpg'}, '2': {'title': 'A Very Potter Musical', 'vote_average': 8.1, 'poster_path': 'http://image.tmdb.org/t/p/w185/aUERHy9crIrfB4b55PltpyViER8.jpg'}, '3': {'title': 'Zero Motivation', 'vote_average': 6.5, 'poster_path': 'http://image.tmdb.org/t/p/w185/o47GwtgXRD6k8qxcJseQK2Fa9C0.jpg'}, '4': {'title': 'Crooks in Clover', 'vote_average': 7.74, 'poster_path': 'http://image.tmdb.org/t/p/w185/1uYCIsMpX7FtWu2xO1YMIAQYvLR.jpg'}, '5': {'title': 'Welfare', 'vote_average': 7.1, 'poster_path': 'http://image.tmdb.org/t/p/w185/1eFgNI93rRSOFZZ9cajF3tMmGPc.jpg'}, '6': {'title': 'Tito and Me', 'vote_average': 7.121, 'poster_path': 'http://image.tmdb.org/t/p/w185/hLekdPVjTjqbkauVR8DXDgixrw4.jpg'}, '7': {'title': 'The Matrix', 'vote_average': 8.221, 'poster_path': 'http://image.tmdb.org/t/p/w185/dXNAPwY7VrqMAo51EKhhCJfaGb5.jpg'}, '8': {'title': 'For Neda', 'vote_average': 6.

In [None]:
test_2 = get_user_recommendations(ratings, 50, model_svd, 24)
print(test_2)

In [20]:
user_id = 501
df_user = ratings[ratings["userid"] == user_id]
df_user = df_user.sort_values(by="rating", ascending=False)
best_movies = df_user.head(3)


{'1': {'title': 'Antz', 'vote_average': 6.165, 'poster_path': 'http://image.tmdb.org/t/p/w185/pvpJfkzcqENTSlBKCSImi05WlXK.jpg'}, '2': {'title': 'Toy Story 2', 'vote_average': 7.6, 'poster_path': 'http://image.tmdb.org/t/p/w185/yFWQkz2ynjwsazT6xQiIXEUsyuh.jpg'}, '3': {'title': 'The Adventures of Rocky & Bullwinkle', 'vote_average': 4.1, 'poster_path': 'http://image.tmdb.org/t/p/w185/xCFSsftt2rglC81I6QLWcZSTCBM.jpg'}, '4': {'title': "The Emperor's New Groove", 'vote_average': 7.6, 'poster_path': 'http://image.tmdb.org/t/p/w185/xU635vn1lMD9DWDloeuKmuhxxnQ.jpg'}, '5': {'title': 'Monsters, Inc.', 'vote_average': 7.843, 'poster_path': 'http://image.tmdb.org/t/p/w185/qjlbN6aK1qgeg3SspFVovT2D1Me.jpg'}, '6': {'title': 'DuckTales: The Movie - Treasure of the Lost Lamp', 'vote_average': 6.7, 'poster_path': 'http://image.tmdb.org/t/p/w185/wCvdRynGJQWaOi2qwQP5nDdFxjM.jpg'}, '7': {'title': 'The Wild', 'vote_average': 5.5, 'poster_path': 'http://image.tmdb.org/t/p/w185/sxqzaOto1qyUseSg6YpIP9Su5lp.jpg

In [None]:
best_movies.head()

In [10]:
imdb_list = [
            imdb_dict[movie_id]
            for movie_id in best_movies["movieid"]
            if movie_id in imdb_dict
        ]

print(imdb_list)

[117731, 116629, 117665]


In [37]:
results = api_tmdb_request(imdb_list)
print(results)

{'1': {'title': 'Star Trek: First Contact', 'vote_average': 7.29, 'poster_path': 'http://image.tmdb.org/t/p/w185/vrC1lkTktFQ4AqBfqf4PXoDDLcw.jpg'}, '2': {'title': 'Independence Day', 'vote_average': 6.891, 'poster_path': 'http://image.tmdb.org/t/p/w185/p0BPQGSPoSa8Ml0DAf2mB2kCU0R.jpg'}, '3': {'title': 'Sleepers', 'vote_average': 7.604, 'poster_path': 'http://image.tmdb.org/t/p/w185/yUpiEk2EojS9ZEXb3nIQonQCYYF.jpg'}}


In [11]:
def api_tmdb_request(movie_ids):
    """Effectue des requêtes à l'API TMDB pour récupérer les informations des films."""
    results = {}

    for movie_id in movie_ids:
        formatted_id = format_movie_id(movie_id)
        url = f"https://api.themoviedb.org/3/find/tt{formatted_id}?external_source=imdb_id"

        headers = {
            "accept": "application/json",
            "Authorization": f"Bearer {tmdb_token}",
        }

        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            data = response.json()
            if data["movie_results"]:
                index = len(results) + 1
                movie_info = data["movie_results"][0]
                results[str(index)] = {
                    "title": movie_info["title"],
                    "vote_average": movie_info["vote_average"],
                    "poster_path": f"http://image.tmdb.org/t/p/w185{movie_info['poster_path']}",
                }

        # Vérifie si nous avons atteint 12 résultats
        if len(results) >= 12:
            break

    return results

In [12]:
for movie_id in imdb_list:
    formatted_id = format_movie_id(movie_id)

print(formatted_id)

0117665


In [13]:
recommendations = get_user_recommendations(
            ratings, user_id, model_svd, n_recommendations=24
        )

print(recommendations)

[79842, 97300, 113315, 100553, 106334, 85012, 88570, 25987, 27423, 116136, 68175, 117364, 70978, 26674, 73818, 26109, 318, 47084, 95604, 109925, 27611, 3147, 96460, 82121]


In [14]:
imdb_list = [
            imdb_dict[movie_id] for movie_id in recommendations if movie_id in imdb_dict
        ]

print(imdb_list)

[1671630, 266010, 3576084, 2092588, 2262456, 56322, 73887, 46851, 271383, 3012698, 318413, 3455224, 92550, 98898, 115928, 57591, 111161, 388505, 996994, 56628, 314979, 120689, 83316, 39080]


In [15]:
results = api_tmdb_request(imdb_list)

print(results)

{'1': {'title': 'For Neda', 'vote_average': 6.8, 'poster_path': 'http://image.tmdb.org/t/p/w185/4mR78UfKMwCPx0rldYOEbegh0Wz.jpg'}, '2': {'title': 'Björk: Volumen', 'vote_average': 8.3, 'poster_path': 'http://image.tmdb.org/t/p/w185/jhFdZGOmkIZCRIjoaGUbHAwXsi0.jpg'}, '3': {'title': 'Zero Motivation', 'vote_average': 6.5, 'poster_path': 'http://image.tmdb.org/t/p/w185/o47GwtgXRD6k8qxcJseQK2Fa9C0.jpg'}, '4': {'title': 'The Given Word', 'vote_average': 8.1, 'poster_path': 'http://image.tmdb.org/t/p/w185/cN8Lk5bm4dQX8enBPP5LxQRdn9k.jpg'}, '5': {'title': 'Welfare', 'vote_average': 7.1, 'poster_path': 'http://image.tmdb.org/t/p/w185/1eFgNI93rRSOFZZ9cajF3tMmGPc.jpg'}, '6': {'title': 'The Crucified Lovers', 'vote_average': 7.8, 'poster_path': 'http://image.tmdb.org/t/p/w185/3FTzr49lQKouUEutQRX5bqMCO4C.jpg'}, '7': {'title': "A Dog's Will", 'vote_average': 8.4, 'poster_path': 'http://image.tmdb.org/t/p/w185/wdnRsioGMCntvV9VXR1TBhFemDd.jpg'}, '8': {'title': 'The Magic of Méliès', 'vote_average': 7

In [65]:
n_recommendations=10
sim_scores = list(enumerate(cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:(n_recommendations+1)]
similar_movies = [i[0] for i in sim_scores]

In [66]:
print(f"Because you watched {title}:")
movies['title'].iloc[similar_movies]

Because you watched Toy Story:


2209                                                 Antz
3027                                          Toy Story 2
3663              Adventures of Rocky and Bullwinkle, The
3922                            Emperor's New Groove, The
4790                                       Monsters, Inc.
10114    DuckTales: The Movie - Treasure of the Lost Lamp
10987                                           Wild, The
11871                                     Shrek the Third
13337                             Tale of Despereaux, The
18274    Asterix and the Vikings (Astérix et les Vikings)
Name: title, dtype: object

In [69]:
def get_content_based_recommendations(title_string, n_recommendations=10):
    title = movie_finder(all_titles, title_string)
    idx = movie_idx[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:(n_recommendations+1)]
    similar_movies = [i[0] for i in sim_scores]
    return similar_movies

In [71]:
similar_movies = get_content_based_recommendations('toy story', 10)
print(similar_movies)
print(f"Because you watched {title}:")
print(movies['title'].iloc[similar_movies])

[2209, 3027, 3663, 3922, 4790, 10114, 10987, 11871, 13337, 18274]
Because you watched Toy Story:
2209                                                 Antz
3027                                          Toy Story 2
3663              Adventures of Rocky and Bullwinkle, The
3922                            Emperor's New Groove, The
4790                                       Monsters, Inc.
10114    DuckTales: The Movie - Treasure of the Lost Lamp
10987                                           Wild, The
11871                                     Shrek the Third
13337                             Tale of Despereaux, The
18274    Asterix and the Vikings (Astérix et les Vikings)
Name: title, dtype: object


In [16]:
# Focntion qui regroupe les recommandations
def get_content_based_recommendations(all_titles, title_string, cosine_sim, n_recommendations=15):
    title = movie_finder(all_titles, title_string)
    idx = movie_idx[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:(n_recommendations+1)]
    similar_movies = [i[0] for i in sim_scores]
    return similar_movies

In [17]:
title = 'toy story'
recommendations = get_content_based_recommendations(movie_titles, title, similarity_cosinus, n_recommendations=24)
print(recommendations)



[2209, 3027, 3663, 3922, 4790, 10114, 10987, 11871, 13337, 18274, 21355, 24092, 24156, 24458, 24460, 24849, 27270, 664, 4211, 7814, 8780, 10367, 11187, 11704]


In [25]:
movies_id = [movies['movieid'].iloc[i] for i in recommendations]
print(movies_id)
print(movies['title'].iloc[recommendations])

[2294, 3114, 3754, 4016, 4886, 33463, 45074, 53121, 65577, 91355, 103755, 114240, 114552, 115875, 115879, 117454, 131248, 673, 4306, 8444, 26340, 36397, 47124, 51632]
2209                                                  Antz
3027                                           Toy Story 2
3663               Adventures of Rocky and Bullwinkle, The
3922                             Emperor's New Groove, The
4790                                        Monsters, Inc.
10114     DuckTales: The Movie - Treasure of the Lost Lamp
10987                                            Wild, The
11871                                      Shrek the Third
13337                              Tale of Despereaux, The
18274     Asterix and the Vikings (Astérix et les Vikings)
21355                                                Turbo
24092                                              Aladdin
24156                                       Boxtrolls, The
24458                   Toy Story Toons: Hawaiian Vacation
24460  

In [21]:
imbd_4790 = imdb_dict[4790]
print(imbd_4790)

75132


In [26]:
imdb_list = [
            imdb_dict[movie_id] for movie_id in movies_id if movie_id in imdb_dict
        ]

print(imdb_list)

[120587, 120363, 131704, 120917, 198781, 99472, 405469, 413267, 420238, 371552, 1860353, 827990, 787474, 1850374, 2033372, 986361, 465925, 117705, 126029, 92752, 72901, 361089, 429589, 344864]


In [85]:
test = links[links['movieid'] == 4790
             ]

test

Unnamed: 0,movieid,imdbid,tmdbid
4694,4790,75132,35200


In [27]:
results = api_tmdb_request(imdb_list)
print(results)

{'1': {'title': 'Antz', 'vote_average': 6.163, 'poster_path': 'http://image.tmdb.org/t/p/w185/pvpJfkzcqENTSlBKCSImi05WlXK.jpg'}, '2': {'title': 'Toy Story 2', 'vote_average': 7.6, 'poster_path': 'http://image.tmdb.org/t/p/w185/yFWQkz2ynjwsazT6xQiIXEUsyuh.jpg'}, '3': {'title': 'The Adventures of Rocky & Bullwinkle', 'vote_average': 4.1, 'poster_path': 'http://image.tmdb.org/t/p/w185/xCFSsftt2rglC81I6QLWcZSTCBM.jpg'}, '4': {'title': "The Emperor's New Groove", 'vote_average': 7.56, 'poster_path': 'http://image.tmdb.org/t/p/w185/xU635vn1lMD9DWDloeuKmuhxxnQ.jpg'}, '5': {'title': 'Monsters, Inc.', 'vote_average': 7.844, 'poster_path': 'http://image.tmdb.org/t/p/w185/qjlbN6aK1qgeg3SspFVovT2D1Me.jpg'}, '6': {'title': 'DuckTales: The Movie - Treasure of the Lost Lamp', 'vote_average': 6.7, 'poster_path': 'http://image.tmdb.org/t/p/w185/wCvdRynGJQWaOi2qwQP5nDdFxjM.jpg'}, '7': {'title': 'The Wild', 'vote_average': 5.533, 'poster_path': 'http://image.tmdb.org/t/p/w185/sxqzaOto1qyUseSg6YpIP9Su5lp.

In [89]:
formatted_id = format_movie_id(75132)
formatted_id

'0075132'

In [90]:
url = f"https://api.themoviedb.org/3/find/tt{formatted_id}?external_source=imdb_id"

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {tmdb_token}",
}

response = requests.get(url, headers=headers)

if response.status_code == 200:
    data = response.json()
    print(data)

{'movie_results': [{'backdrop_path': '/1Nas9syOK93PltBGVjXJi9QvxM3.jpg', 'id': 35200, 'title': 'The Return of a Man Called Horse', 'original_title': 'The Return of a Man Called Horse', 'overview': "Lord John Morgan has returned to civilized life in England, but finds he has nothing but disdain for that life. Yearning to embrace the simplicity of the American West-and the Yellow Hands Sioux tribe he left behind, Morgan returns to the tribe's land only to discover that they've been decimated by ruthless, government-backed fur traders. Led by Horse, they fight to repossess their land.", 'poster_path': '/qOoLZCDwt3tOyFNv4MRAVvrSFiM.jpg', 'media_type': 'movie', 'adult': False, 'original_language': 'en', 'genre_ids': [37, 12], 'popularity': 8.947, 'release_date': '1976-06-28', 'video': False, 'vote_average': 5.6, 'vote_count': 54}], 'person_results': [], 'tv_results': [], 'tv_episode_results': [], 'tv_season_results': []}


In [24]:
titre = movies[movies['title'] == 'Monsters, Inc.']
titre

Unnamed: 0,movieid,title,genres,year
4790,4886,"Monsters, Inc.","Adventure, Animation, Children, Comedy, Fantasy",2001


In [22]:
movies.head()

Unnamed: 0,movieid,title,genres,year
0,1,Toy Story,"Adventure, Animation, Children, Comedy, Fantasy",1995
1,2,Jumanji,"Adventure, Children, Fantasy",1995
2,3,Grumpier Old Men,"Comedy, Romance",1995
3,4,Waiting to Exhale,"Comedy, Drama, Romance",1995
4,5,Father of the Bride Part II,Comedy,1995


In [48]:
def get_top_n_recommendations(user_id, n=10):
    user_movies = df[df['userid'] == user_id]['movieid'].unique()
    all_movies = df['movieid'].unique()
    movies_to_predict = list(set(all_movies) - set(user_movies))

    user_movie_pairs = [(user_id, movie_id, 0) for movie_id in movies_to_predict]
    predictions_cf = model_svd.test(user_movie_pairs)

    top_n_recommendations = sorted(predictions_cf, key = lambda x: x.est)[:n]

    for pred in top_n_recommendations:
        predicted_rating = pred.est
        print(predicted_rating)


    top_n_movie_ids = [int(pred.iid) for pred in top_n_recommendations]

    return top_n_movie_ids

In [49]:
user_id = 221
recommendations = get_top_n_recommendations(user_id)
top_n_movies_titles = movies[movies['movieid'].isin(recommendations)]['title'].tolist()
print(f"Top 5 Recommendations for User {user_id}:")
for i, title in enumerate(top_n_movies_titles, 1):
    print(f"{i}.{title}")

1.2775692102727265
1.460825985769735
1.4923238585404381
1.496890155428611
1.52659282051723
1.5448081295962317
1.5906162961613841
1.6113855284500447
1.6223927486700045
1.6603796628748584
Top 5 Recommendations for User 221:
1.Turbo: A Power Rangers Movie
2.Barney's Great Adventure
3.Glitter
4.From Justin to Kelly
5.Gigli
6.SuperBabies: Baby Geniuses 2
7.Son of the Mask
8.Epic Movie
9.Bratz: The Movie
10.Disaster Movie


In [50]:
user_id = 20560
recommendations = get_top_n_recommendations(user_id)
top_n_movies_titles = movies[movies['movieid'].isin(recommendations)]['title'].tolist()
print(f"Top 5 Recommendations for User {user_id}:")
for i, title in enumerate(top_n_movies_titles, 1):
    print(f"{i}.{title}")

1.2775692102727265
1.460825985769735
1.4923238585404381
1.496890155428611
1.52659282051723
1.5448081295962317
1.5906162961613841
1.6113855284500447
1.6223927486700045
1.6603796628748584
Top 5 Recommendations for User 20560:
1.Turbo: A Power Rangers Movie
2.Barney's Great Adventure
3.Glitter
4.From Justin to Kelly
5.Gigli
6.SuperBabies: Baby Geniuses 2
7.Son of the Mask
8.Epic Movie
9.Bratz: The Movie
10.Disaster Movie
