#### Script pour récupérer des données les films

In [1]:
import requests
import pandas as pd
import os
import time
import csv
from datetime import datetime 

# Fonction pour récupérer la correspondance entre les IDs de genres et leurs noms
def get_genre_names(api_key):
    url = "https://api.themoviedb.org/3/genre/movie/list"
    params = {
        'api_key': api_key,
        'language': 'en-US'
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        genre_list = response.json()['genres']
        return {genre['id']: genre['name'] for genre in genre_list}
    return {}

def get_movies(api_key, number_of_movies, genre_names, csv_file_path):
    url = "https://api.themoviedb.org/3/discover/movie"
    page = 1
    total_movies = 0
    today = datetime.now().strftime('%Y-%m-%d')
    with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=['movie_id', 'title', 'genres', 'release_date', 'poster_link'])
        writer.writeheader()
        while total_movies < number_of_movies:
            params = {
                'api_key': api_key,
                'language': 'en-US',
                'sort_by': 'popularity.desc',
                'page': page
            }
            response = requests.get(url, params=params)
            if response.status_code == 200:
                results = response.json()['results']
                for movie in results:
                    release_date = movie.get('release_date', '')
                    if release_date and release_date < today:
                        movie_genres = [genre_names.get(genre_id) for genre_id in movie.get('genre_ids', [])]
                        poster_link = f"https://image.tmdb.org/t/p/original{movie.get('poster_path')}" if movie.get('poster_path') else None
                        writer.writerow({
                            'movie_id': movie['id'],
                            'title': movie['title'],
                            'genres': '|'.join(filter(None, movie_genres)),
                            'release_date': release_date,
                            'poster_link': poster_link
                        })
                        total_movies += 1
                        if total_movies >= number_of_movies:
                            break
            else:
                break
            page += 1
            time.sleep(0.5)  # to comply with rate limit of the API

# Configuration
api_key = "b8218bd4516663ac1ad5be68dd943a7c"
number_of_movies = 200  # Adjust this to the desired number of movies

# Get the genre names
genre_names = get_genre_names(api_key)

# Create the data directory if it does not exist
output_folder = 'data'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Define the path for the CSV file
csv_file_path = os.path.join(output_folder, 'movies_data.csv')

# Collect the data and write to CSV
get_movies(api_key, number_of_movies, genre_names, csv_file_path)