In [4]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from dotenv import load_dotenv
from datetime import date
import time
import pandas as pd
import os

In [5]:
#--- CONFIGURATION ---

DATE = date.today()

load_dotenv()
API_KEY = os.getenv("API_KEY")

REGION_CODE = "FR"
RELEVANCE_LANGUAGE = "fr" # Priorise le contenu FR
MAX_VIDEOS_PER_CAT = 200 
CSV_INPUT = 'cats.csv'

youtube = build('youtube', 'v3', developerKey=API_KEY)

In [6]:
def get_popular_videos(category_id, category_name):
    videos = []
    next_page_token = None
    
    print(f"Extraction : {category_name}...")
    
    while len(videos) < MAX_VIDEOS_PER_CAT:
        try:
            request = youtube.videos().list(
                part="snippet,contentDetails,statistics",
                chart="mostPopular",
                regionCode=REGION_CODE,
                videoCategoryId=str(category_id),
                maxResults=50,
                pageToken=next_page_token
            )
            response = request.execute()
            
            items = response.get('items', [])
            if not items:
                break

            for item in items:
                
                videos.append({
                    "video_id": item["id"],
                    "title": item["snippet"]["title"],
                    "description": item["snippet"].get("description", ""),
                    "channel": item["snippet"]["channelTitle"],
                    "published_at": item["snippet"]["publishedAt"],
                    "duration": item.get("contentDetails", {}).get("duration", None),
                    "views": int(item["statistics"].get("viewCount", 0)),
                    "likes": int(item["statistics"].get("likeCount", 0)),
                    "comments": int(item["statistics"].get("commentCount", 0)),
                    "channel_id": item['snippet'].get('channelId'),
                    "category_id": item['snippet'].get('categoryId'),
                    "language": item['snippet'].get('defaultAudioLanguage', 'N/A')
                })
            
            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break
                
        except Exception as e:
            print(f"Erreur sur {category_name}: {e}")
            break
            
    return videos



In [12]:
# --- EXECUTION ---

# Charger le fichier avec les cats youtube

df = pd.read_csv(CSV_INPUT)
df_cats = df[df['chart_available']==True]

# Liste pour le DataFrame global final
full_data = []

# Boucler sur les catégories
for _, row in df_cats.iterrows():
    cat_videos = get_popular_videos(row['category_id'], row['name'])
    
    if cat_videos:
        full_data.extend(cat_videos)
        
    time.sleep(0.2) # Pause légère

# Créer le df
if full_data:
    df_final = pd.DataFrame(full_data)
    df_final.to_csv(f"new_videos/{DATE}.csv", index=False, encoding='utf-8-sig')
    print(f"\nTerminé ! {len(df_final)} vidéos enregistrées dans new_videos/{DATE}")
else:
    print("\nAucune donnée n'a été récupérée.")


Extraction : Film & Animation...
Extraction : Music...
Extraction : Autos & Vehicles...
Extraction : Gaming...
Extraction : Pets & Animals...
Extraction : Sports...
Extraction : People & Blogs...
Extraction : Comedy...
Extraction : Entertainment...
Extraction : News & Politics...
Extraction : Howto & Style...
Extraction : Science & Technology...

Terminé ! 1951 vidéos enregistrées dans new_videos/2026-02-06
