Importation des bibliothèques

In [4]:
import os
import requests
import pandas as pd

Récupération des données

In [5]:
# Ta clé d'API TMDb
api_key = os.getenv('TMDB_API_KEY') #Vous pouvez l'avoir après inscription sur le site de TMDB

# Endpoint pour les films populaires
url = f'https://api.themoviedb.org/3/movie/popular?api_key={api_key}&language=en-US&page='

# Initialisation des listes pour stocker les données
movie_ids = []
titles = []
popularity = []
overview = []
genres = []
actors = []
directors = []
vote_averages = []
release_years = []

num_pages = 450  # Par exemple, récupérons les 400 premières pages

# Boucle pour récupérer les données de plusieurs pages
for page_num in range(1, num_pages + 1):
    response = requests.get(url + str(page_num))
    if response.status_code == 200:
        data = response.json()
        for movie in data['results']:
            movie_ids.append(movie['id'])
            titles.append(movie['title'])
            popularity.append(movie['popularity'])
            overview.append(movie['overview'])
            
            # Récupération des détails supplémentaires pour chaque film
            movie_detail_url = f"https://api.themoviedb.org/3/movie/{movie['id']}?api_key={api_key}&language=en-US&append_to_response=credits"
            detail_response = requests.get(movie_detail_url)
            if detail_response.status_code == 200:
                movie_detail = detail_response.json()
                # Genres
                genres.append([genre['name'] for genre in movie_detail['genres']])
                # Acteurs (avec vérification)
                if 'credits' in movie_detail and 'cast' in movie_detail['credits']:
                    actors.append([actor['name'] for actor in movie_detail['credits']['cast']])
                else:
                    actors.append(None)
                # Réalisateur
                if 'credits' in movie_detail and 'crew' in movie_detail['credits']:
                    directors.append([crew['name'] for crew in movie_detail['credits']['crew'] if crew['job'] == 'Director'])
                else:
                    directors.append(None)
                # Moyenne des votes
                vote_averages.append(movie_detail['vote_average'])
                # Année de sortie
                release_years.append(pd.to_datetime(movie_detail['release_date']).year)
            else:
                genres.append(None)
                actors.append(None)
                vote_averages.append(None)
                release_years.append(None)
    else:
        print("Erreur lors de la requête")

# Création du DataFrame avec les données récupérées
movies_df = pd.DataFrame({
    'MovieID': movie_ids,
    'Title': titles,
    'Popularity': popularity,
    'Overview': overview,
    'Genres': genres,
    'Actors': actors,
    'Directors': directors,
    'VoteAverage': vote_averages,
    'ReleaseYear': release_years
    
})

# Affichage des premières lignes du DataFrame
movies_df.head()


Unnamed: 0,MovieID,Title,Popularity,Overview,Genres,Actors,Directors,VoteAverage,ReleaseYear
0,533535,Deadpool & Wolverine,5948.017,A listless Wade Wilson toils away in civilian ...,"[Action, Comedy, Science Fiction]","[Ryan Reynolds, Hugh Jackman, Emma Corrin, Mat...",[Shawn Levy],7.763,2024.0
1,1022789,Inside Out 2,3128.461,Teenager Riley's mind headquarters is undergoi...,"[Animation, Family, Adventure, Comedy]","[Amy Poehler, Maya Hawke, Kensington Tallman, ...",[Kelsey Mann],7.678,2024.0
2,519182,Despicable Me 4,2384.852,"Gru and Lucy and their girls—Margo, Edith and ...","[Animation, Family, Comedy, Action]","[Steve Carell, Kristen Wiig, Will Ferrell, Sof...",[Chris Renaud],7.297,2024.0
3,718821,Twisters,2251.448,"As storm season intensifies, the paths of form...","[Action, Adventure, Drama, Thriller]","[Daisy Edgar-Jones, Glen Powell, Anthony Ramos...",[Lee Isaac Chung],7.039,2024.0
4,704239,The Union,1791.389,A New Jersey construction worker goes from reg...,"[Action, Comedy]","[Mark Wahlberg, Halle Berry, J.K. Simmons, Mik...",[Julian Farino],6.368,2024.0


In [6]:
# Convertir les listes de genres en chaînes de caractères séparées par des virgules
movies_df['Genres'] = movies_df['Genres'].apply(lambda x: ', '.join(x) if x is not None else '')

# Convertir les listes d'acteurs en chaînes de caractères séparées par des virgules
movies_df['Actors'] = movies_df['Actors'].apply(lambda x: ', '.join(x) if x is not None else '')

# Convertir les listes d'acteurs en chaînes de caractères séparées par des virgules
movies_df['Directors'] = movies_df['Directors'].apply(lambda x: ', '.join(x) if x is not None else '')

# Affichage des premières lignes du DataFrame
movies_df.head()

Unnamed: 0,MovieID,Title,Popularity,Overview,Genres,Actors,Directors,VoteAverage,ReleaseYear
0,533535,Deadpool & Wolverine,5948.017,A listless Wade Wilson toils away in civilian ...,"Action, Comedy, Science Fiction","Ryan Reynolds, Hugh Jackman, Emma Corrin, Matt...",Shawn Levy,7.763,2024.0
1,1022789,Inside Out 2,3128.461,Teenager Riley's mind headquarters is undergoi...,"Animation, Family, Adventure, Comedy","Amy Poehler, Maya Hawke, Kensington Tallman, L...",Kelsey Mann,7.678,2024.0
2,519182,Despicable Me 4,2384.852,"Gru and Lucy and their girls—Margo, Edith and ...","Animation, Family, Comedy, Action","Steve Carell, Kristen Wiig, Will Ferrell, Sofí...",Chris Renaud,7.297,2024.0
3,718821,Twisters,2251.448,"As storm season intensifies, the paths of form...","Action, Adventure, Drama, Thriller","Daisy Edgar-Jones, Glen Powell, Anthony Ramos,...",Lee Isaac Chung,7.039,2024.0
4,704239,The Union,1791.389,A New Jersey construction worker goes from reg...,"Action, Comedy","Mark Wahlberg, Halle Berry, J.K. Simmons, Mike...",Julian Farino,6.368,2024.0


In [7]:
# Sauvegarder le DataFrame dans un fichier CSV dans le répertoire de travail actuel
movies_df.to_csv('movies2024.csv', index=False)
# Vérification en affichant quelques lignes du fichier CSV sauvegardé
pd.read_csv('movies2024.csv').head()


Unnamed: 0,MovieID,Title,Popularity,Overview,Genres,Actors,Directors,VoteAverage,ReleaseYear
0,533535,Deadpool & Wolverine,5948.017,A listless Wade Wilson toils away in civilian ...,"Action, Comedy, Science Fiction","Ryan Reynolds, Hugh Jackman, Emma Corrin, Matt...",Shawn Levy,7.763,2024.0
1,1022789,Inside Out 2,3128.461,Teenager Riley's mind headquarters is undergoi...,"Animation, Family, Adventure, Comedy","Amy Poehler, Maya Hawke, Kensington Tallman, L...",Kelsey Mann,7.678,2024.0
2,519182,Despicable Me 4,2384.852,"Gru and Lucy and their girls—Margo, Edith and ...","Animation, Family, Comedy, Action","Steve Carell, Kristen Wiig, Will Ferrell, Sofí...",Chris Renaud,7.297,2024.0
3,718821,Twisters,2251.448,"As storm season intensifies, the paths of form...","Action, Adventure, Drama, Thriller","Daisy Edgar-Jones, Glen Powell, Anthony Ramos,...",Lee Isaac Chung,7.039,2024.0
4,704239,The Union,1791.389,A New Jersey construction worker goes from reg...,"Action, Comedy","Mark Wahlberg, Halle Berry, J.K. Simmons, Mike...",Julian Farino,6.368,2024.0


In [8]:
# Affichage des dernières lignes du DataFrame
movies_df.tail()

Unnamed: 0,MovieID,Title,Popularity,Overview,Genres,Actors,Directors,VoteAverage,ReleaseYear
8995,50357,Apollo 18,17.765,"Officially, Apollo 17 was the last manned miss...","Horror, Thriller, Science Fiction","Ryan Robbins, Warren Christie, Lloyd Owen, And...",Gonzalo López-Gallego,5.3,2011.0
8996,10596,Replicant,17.763,Scientists create a genetic clone of a serial ...,"Action, Science Fiction, Thriller","Michael Rooker, Jean-Claude Van Damme, Ian Rob...",Ringo Lam,5.841,2001.0
8997,566076,The United States vs. Billie Holiday,17.762,Billie Holiday spent much of her career being ...,"Music, Drama, History","Andra Day, Trevante Rhodes, Garrett Hedlund, L...",Lee Daniels,6.602,2021.0
8998,146239,Delivery Man,17.762,An affable underachiever finds out he's father...,Comedy,"Vince Vaughn, Cobie Smulders, Chris Pratt, Bri...",Ken Scott,6.248,2013.0
8999,1075523,La Querida,17.761,A young couple's romance is cut short when the...,Drama,"Angela Morena, Arron Villaflor, Mercedes Cabra...",G.B. Sampedro,4.2,2023.0


In [9]:
# Affichage des dernières lignes du DataFrame
movies_df.shape

(9000, 9)