In [1]:
import requests
import dotenv
import os
import json

dotenv.load_dotenv()
token = os.getenv("TMDB_TOKEN")

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {token}"
}

url = "https://api.themoviedb.org/3/genre/movie/list?language=fr"
response = requests.get(url, headers=headers)

genres_str = response.json()
id_to_genre = {genre['id']: genre['name'] for genre in genres_str['genres']}

page = 1
list_movies = []
while page <= 20:
    url = f"https://api.themoviedb.org/3/movie/top_rated?language=fr-FR&page={page}"
    response = requests.get(url, headers=headers)
    response = response.json()
    
    for i in range(0, len(response["results"])):
        data = {
            "genres": [id_to_genre[i] for i in response["results"][i]["genre_ids"]],
            "poster": f"https://image.tmdb.org/t/p/original{response['results'][i].get('poster_path')}"
        }
        list_movies.append(data)
    page += 1
    
with open("movies.json", "w") as f:
    json.dump(list_movies, f, indent=4)

In [2]:
# 16 = Animation & 27 = Horreur

import requests
import json
import dotenv

dotenv.load_dotenv()
api_key = os.getenv("TMDB_TOKEN")

genres = {"16": "Animation", "27": "Horreur"}

params = {
    'language': 'fr-FR',
    'include_adult': False,
    'include_video': False,
    'vote_count.gte': 100,
    "sort_by": "popularity.desc"
}

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {api_key}"
}

# URL de l'endpoint de découverte de films
url = 'https://api.themoviedb.org/3/discover/movie'

all_movies = []
for genre_id, genre_name in genres.items():
    for i in range(1, 31):
        
        params["with_genres"] = genre_id
        params["page"] = i
        
        response = requests.get(url, params=params, headers=headers)
        if response.status_code != 200:
            print(f"Erreur {response.status_code} : {response.text}")
            break
        
        data = response.json()
        
        for movie in data["results"]:
            custom_data = {"genre" : genre_name, "poster": f"https://image.tmdb.org/t/p/original{movie.get('poster_path')}"}
            all_movies.append(custom_data)

with open("movies_binary_classif.json", "w") as dataset:
    json.dump(all_movies, dataset)

In [7]:
import requests
import json
import dotenv
import os
from tqdm import tqdm

# Charger les genres
with open("genres.json", "r", encoding="utf-8") as f:
    genres = json.load(f)

genres_dict = {}
for genre in genres["genres"]:
    genres_dict[genre["id"]] = genre["name"]

# Configuration API
dotenv.load_dotenv()
api_key = os.getenv("TMDB_TOKEN")

params = {
    'language': 'fr-FR',
    'include_adult': False,
    'include_video': False,
    'vote_count.gte': 100,
    "sort_by": "popularity.desc"
}

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {api_key}"
}

url = 'https://api.themoviedb.org/3/discover/movie'

# Calcul du nombre total de requêtes
total_requests = len(genres_dict) * 50
print(f"Récupération de {total_requests} pages pour {len(genres_dict)} genres...")

all_movies = []

# Barre de progression principale
with tqdm(total=total_requests, desc="Récupération films", unit="pages") as pbar:
    
    for genre_id, genre_name in genres_dict.items():
        # Barre de progression par genre (optionnel)
        pbar.set_description(f"Genre: {genre_name}")
        
        for page in range(1, 51):
            params["with_genres"] = genre_id
            params["page"] = page
            
            try:
                response = requests.get(url, params=params, headers=headers)
                
                if response.status_code != 200:
                    print(f"\nErreur {response.status_code} pour {genre_name}, page {page}: {response.text}")
                    pbar.update(1)
                    continue
                
                data = response.json()
                
                # Traitement des films de cette page
                for movie in data["results"]:
                    if movie.get('poster_path'):  # Vérifier qu'il y a un poster
                        custom_data = {
                            "genre": genre_name, 
                            "poster": f"https://image.tmdb.org/t/p/original{movie.get('poster_path')}"
                        }
                        all_movies.append(custom_data)
                
                # Mise à jour de la barre avec info détaillée
                pbar.set_postfix({
                    'Films': len(all_movies),
                    'Page': f"{page}/50"
                })
                
            except requests.RequestException as e:
                print(f"\nErreur réseau pour {genre_name}, page {page}: {e}")
            
            finally:
                pbar.update(1)

print(f"\n✅ Récupération terminée ! {len(all_movies)} films collectés")

# Sauvegarde
print("💾 Sauvegarde en cours...")
with open("movies_full_classif.json", "w", encoding="utf-8") as dataset:
    json.dump(all_movies, dataset, ensure_ascii=False, indent=2)

print(f"🎬 Dataset sauvegardé : {len(all_movies)} films dans movies_full_classif.json")

Récupération de 950 pages pour 19 genres...


Genre: Action:   0%|          | 0/950 [00:00<?, ?pages/s]     

Genre: Western: 100%|██████████| 950/950 [02:26<00:00,  6.49pages/s, Films=16849, Page=50/50]        



✅ Récupération terminée ! 16849 films collectés
💾 Sauvegarde en cours...
🎬 Dataset sauvegardé : 16849 films dans movies_full_classif.json
