Importation des bibliothèques

In [5]:
import os
import requests
import pandas as pd

Récupération des données

In [6]:
# Ta clé d'API TMDb
api_key = os.getenv('TMDB_API_KEY') #Vous pouvez l'avoir après inscription sur le site de TMDB

# Endpoint pour les films populaires
url = f'https://api.themoviedb.org/3/movie/popular?api_key={api_key}&language=en-US&page='

# Initialisation des listes pour stocker les données
movie_ids = []
titles = []
popularity = []
overview = []
genres = []
actors = []
directors = []
vote_averages = []
release_years = []

num_pages = 450  # Par exemple, récupérons les 400 premières pages

# Boucle pour récupérer les données de plusieurs pages
for page_num in range(1, num_pages + 1):
    response = requests.get(url + str(page_num))
    if response.status_code == 200:
        data = response.json()
        for movie in data['results']:
            movie_ids.append(movie['id'])
            titles.append(movie['title'])
            popularity.append(movie['popularity'])
            overview.append(movie['overview'])
            
            # Récupération des détails supplémentaires pour chaque film
            movie_detail_url = f"https://api.themoviedb.org/3/movie/{movie['id']}?api_key={api_key}&language=en-US&append_to_response=credits"
            detail_response = requests.get(movie_detail_url)
            if detail_response.status_code == 200:
                movie_detail = detail_response.json()
                # Genres
                genres.append([genre['name'] for genre in movie_detail['genres']])
                # Acteurs (avec vérification)
                if 'credits' in movie_detail and 'cast' in movie_detail['credits']:
                    actors.append([actor['name'] for actor in movie_detail['credits']['cast']])
                else:
                    actors.append(None)
                # Réalisateur
                if 'credits' in movie_detail and 'crew' in movie_detail['credits']:
                    directors.append([crew['name'] for crew in movie_detail['credits']['crew'] if crew['job'] == 'Director'])
                else:
                    directors.append(None)
                # Moyenne des votes
                vote_averages.append(movie_detail['vote_average'])
                # Année de sortie
                release_years.append(pd.to_datetime(movie_detail['release_date']).year)
            else:
                genres.append(None)
                actors.append(None)
                vote_averages.append(None)
                release_years.append(None)
    else:
        print("Erreur lors de la requête")

# Création du DataFrame avec les données récupérées
movies_df = pd.DataFrame({
    'MovieID': movie_ids,
    'Title': titles,
    'Popularity': popularity,
    'Overview': overview,
    'Genres': genres,
    'Actors': actors,
    'Directors': directors,
    'VoteAverage': vote_averages,
    'ReleaseYear': release_years
    
})

# Affichage des premières lignes du DataFrame
movies_df.head()


Unnamed: 0,MovieID,Title,Popularity,Overview,Genres,Actors,Directors,VoteAverage,ReleaseYear
0,933131,Badland Hunters,1382.144,After a deadly earthquake turns Seoul into a l...,"[Science Fiction, Action, Drama]","[Ma Dong-seok, Lee Hee-jun, Lee Jun-young, Roh...",[Heo Myeong-haeng],6.742,2024.0
1,1211483,Skal - Fight for Survival,1000.333,"My name's Arthur, a huge Internet star who's j...","[Action, Horror, Comedy, Thriller]","[Evan Marsh, Chris Sandiford, Mariah Inger, Da...",[Benjamin Cappelletti],5.69,2023.0
2,1029575,The Family Plan,980.464,"Dan Morgan is many things: a devoted husband, ...","[Action, Comedy]","[Mark Wahlberg, Michelle Monaghan, Maggie Q, Z...",[Simon Cellan Jones],7.3,2023.0
3,1214314,One More Shot,943.197,Following the attack on the black site in Pola...,"[Action, Thriller]","[Scott Adkins, Michael Jai White, Alexis Knapp...",[James Nunn],6.636,2024.0
4,787699,Wonka,1175.854,Willy Wonka – chock-full of ideas and determin...,"[Comedy, Family, Fantasy]","[Timothée Chalamet, Calah Lane, Keegan-Michael...",[Paul King],7.2,2023.0


In [7]:
# Convertir les listes de genres en chaînes de caractères séparées par des virgules
movies_df['Genres'] = movies_df['Genres'].apply(lambda x: ', '.join(x) if x is not None else '')

# Convertir les listes d'acteurs en chaînes de caractères séparées par des virgules
movies_df['Actors'] = movies_df['Actors'].apply(lambda x: ', '.join(x) if x is not None else '')

# Convertir les listes d'acteurs en chaînes de caractères séparées par des virgules
movies_df['Directors'] = movies_df['Directors'].apply(lambda x: ', '.join(x) if x is not None else '')

# Affichage des premières lignes du DataFrame
movies_df.head()

Unnamed: 0,MovieID,Title,Popularity,Overview,Genres,Actors,Directors,VoteAverage,ReleaseYear
0,933131,Badland Hunters,1382.144,After a deadly earthquake turns Seoul into a l...,"Science Fiction, Action, Drama","Ma Dong-seok, Lee Hee-jun, Lee Jun-young, Roh ...",Heo Myeong-haeng,6.742,2024.0
1,1211483,Skal - Fight for Survival,1000.333,"My name's Arthur, a huge Internet star who's j...","Action, Horror, Comedy, Thriller","Evan Marsh, Chris Sandiford, Mariah Inger, Dar...",Benjamin Cappelletti,5.69,2023.0
2,1029575,The Family Plan,980.464,"Dan Morgan is many things: a devoted husband, ...","Action, Comedy","Mark Wahlberg, Michelle Monaghan, Maggie Q, Zo...",Simon Cellan Jones,7.3,2023.0
3,1214314,One More Shot,943.197,Following the attack on the black site in Pola...,"Action, Thriller","Scott Adkins, Michael Jai White, Alexis Knapp,...",James Nunn,6.636,2024.0
4,787699,Wonka,1175.854,Willy Wonka – chock-full of ideas and determin...,"Comedy, Family, Fantasy","Timothée Chalamet, Calah Lane, Keegan-Michael ...",Paul King,7.2,2023.0


In [11]:
# Sauvegarder le DataFrame dans un fichier CSV dans le répertoire de travail actuel
movies_df.to_csv('movies2024.csv', index=False)
# Vérification en affichant quelques lignes du fichier CSV sauvegardé
pd.read_csv('movies2024.csv').head()


Unnamed: 0,MovieID,Title,Popularity,Overview,Genres,Actors,Directors,VoteAverage,ReleaseYear
0,933131,Badland Hunters,1382.144,After a deadly earthquake turns Seoul into a l...,"Science Fiction, Action, Drama","Ma Dong-seok, Lee Hee-jun, Lee Jun-young, Roh ...",Heo Myeong-haeng,6.742,2024.0
1,1211483,Skal - Fight for Survival,1000.333,"My name's Arthur, a huge Internet star who's j...","Action, Horror, Comedy, Thriller","Evan Marsh, Chris Sandiford, Mariah Inger, Dar...",Benjamin Cappelletti,5.69,2023.0
2,1029575,The Family Plan,980.464,"Dan Morgan is many things: a devoted husband, ...","Action, Comedy","Mark Wahlberg, Michelle Monaghan, Maggie Q, Zo...",Simon Cellan Jones,7.3,2023.0
3,1214314,One More Shot,943.197,Following the attack on the black site in Pola...,"Action, Thriller","Scott Adkins, Michael Jai White, Alexis Knapp,...",James Nunn,6.636,2024.0
4,787699,Wonka,1175.854,Willy Wonka – chock-full of ideas and determin...,"Comedy, Family, Fantasy","Timothée Chalamet, Calah Lane, Keegan-Michael ...",Paul King,7.2,2023.0


In [12]:
# Affichage des dernières lignes du DataFrame
movies_df.tail()

Unnamed: 0,MovieID,Title,Popularity,Overview,Genres,Actors,Directors,VoteAverage,ReleaseYear
8995,1242948,SMTOWN LIVE 2024 - SMCU PALACE @ TOKYO,17.829,SMTOWN returns to Japan with explosive perform...,Music,"Kangta, Max Changmin, Jung Yun-ho, Leeteuk, Ki...",,0.0,2024.0
8996,70841,Doraemon: Nobita's Dinosaur,17.829,After bringing a fossilized egg back to life w...,"Animation, Adventure, Family, Fantasy, Science...","Wasabi Mizuta, Megumi Oohara, Tomokazu Seki, R...",Ayumu Watanabe,7.5,2006.0
8997,10102,La Grande Bouffe,17.829,Four friends gather at a villa with the intent...,"Drama, Comedy","Marcello Mastroianni, Ugo Tognazzi, Michel Pic...",Marco Ferreri,7.2,1973.0
8998,25934,"To Sir, with Love",17.826,A British Guianese engineer starts a job as a ...,Drama,"Sidney Poitier, Christian Roberts, Judy Geeson...",James Clavell,7.493,1967.0
8999,16236,Mansfield Park,17.823,"In Mansfield Park, poverty-stricken Fanny Pric...","Drama, TV Movie","Billie Piper, Blake Ritson, Hayley Atwell, Mic...",Iain B. MacDonald,6.01,2007.0


In [13]:
# Affichage des dernières lignes du DataFrame
movies_df.shape

(9000, 9)