In [1]:
import requests
import pandas as pd
import time
import os
from dotenv import load_dotenv

load_dotenv()
API_KEY = os.getenv("GOOGLE_API_KEY")

SEARCH_URL = "https://www.googleapis.com/youtube/v3/search"
VIDEOS_URL = "https://www.googleapis.com/youtube/v3/videos"

REGION_CODE = "FR"
MAX_PER_PAGE = 50      # max autoris√©
TARGET_VIDEOS = 1000   # par niche


In [2]:
def collect_niche_videos(keywords, target=1000):
    video_ids = []

    for keyword in keywords:
        page_token = None

        while len(video_ids) < target:
            params = {
                "part": "snippet",
                "q": keyword,
                "type": "video",
                "order": "date",
                "maxResults": MAX_PER_PAGE,
                "key": API_KEY
            }

            if page_token:
                params["pageToken"] = page_token

            response = requests.get(SEARCH_URL, params=params)
            data = response.json()

            for item in data.get("items", []):
                video_ids.append(item["id"]["videoId"])

            page_token = data.get("nextPageToken")
            if not page_token:
                break

    # üîë d√©duplication
    video_ids = list(set(video_ids))

    return video_ids[:target]


In [3]:
# TEST SIMPLE : est-ce que search renvoie des r√©sultats ?
test_ids = collect_niche_videos("fitness", target=10)
print(len(test_ids))
test_ids[:5]


10


['LoPHVX_MSNQ', 'aiuKpl-l0pA', 'qNEH9T6RgLE', 'KkC38DN_Fpk', 'CD-dz05LgaU']

In [4]:
def fetch_video_details(video_ids):
    rows = []

    for i in range(0, len(video_ids), 50):
        batch = video_ids[i:i+50]

        params = {
            "part": "snippet,statistics,contentDetails",
            "id": ",".join(batch),
            "key": API_KEY
        }

        response = requests.get(VIDEOS_URL, params=params)
        data = response.json()

        for video in data.get("items", []):
            rows.append({
                "video_id": video["id"],
                "title": video["snippet"]["title"],
                "description": video["snippet"].get("description", ""),
                "channel": video["snippet"]["channelTitle"],
                "published_at": video["snippet"]["publishedAt"],
                "duration": video.get("contentDetails", {}).get("duration", None),
                "views": int(video["statistics"].get("viewCount", 0)),
                "likes": int(video["statistics"].get("likeCount", 0)),
                "comments": int(video["statistics"].get("commentCount", 0))
            })

        time.sleep(0.2)

    return pd.DataFrame(rows)


In [5]:
niches = {
    "jeux video": [
    "jeux video",
    "jeu video",
    "gaming",
    "gameplay",
    "partie complete",
    "let's play",
    "test jeu video",
    "avis jeu video",
    "nouveau jeu",
    "actualite jeux video",
    "top jeux video",
    "astuces jeux video",
    "guide jeu video",
    "stream jeux video",
    "jeu pc",
    "jeu console"
],
    "automobile": [
    "automobile",
    "voiture",
    "auto",
    "essai voiture",
    "test voiture",
    "avis voiture",
    "comparatif voiture",
    "nouvelle voiture",
    "actualite automobile",
    "voiture electrique",
    "voiture hybride",
    "entretien voiture",
    "mecanique automobile",
    "passion automobile",
    "vlog automobile"
]

}

for niche, keywords in niches.items():
    print(f"Collecte : {niche}")
    ids = collect_niche_videos(keywords, TARGET_VIDEOS)
    df = fetch_video_details(ids)
    df.to_csv(f"market_{niche.replace(' ', '_')}_fr.csv", index=False)
    print(f"market_{niche.replace(' ', '_')}_fr.csv sauvegard√© ({len(df)} vid√©os)")


Collecte : jeux video
market_jeux_video_fr.csv sauvegard√© (973 vid√©os)
Collecte : automobile
market_automobile_fr.csv sauvegard√© (945 vid√©os)


In [6]:
SEARCH_URL = "https://www.googleapis.com/youtube/v3/search"

params = {
    "part": "snippet",
    "q": "fitness",
    "type": "video",
    "maxResults": 5,
    "key": API_KEY
}

r = requests.get(SEARCH_URL, params=params)
print(r.status_code)
print(r.text)


200
{
  "kind": "youtube#searchListResponse",
  "etag": "xtOwZecRSVdjpDHg89eyDfY5HnM",
  "nextPageToken": "CAUQAA",
  "regionCode": "FR",
  "pageInfo": {
    "totalResults": 1000000,
    "resultsPerPage": 5
  },
  "items": [
    {
      "kind": "youtube#searchResult",
      "etag": "IFxHSllU6r0KO23Qh9vufXJajgA",
      "id": {
        "kind": "youtube#video",
        "videoId": "YEyFdtni3uU"
      },
      "snippet": {
        "publishedAt": "2023-05-31T14:00:31Z",
        "channelId": "UCpQ34afVgk8cRQBjSJ1xuJQ",
        "title": "5 MIN ARM WORKOUT",
        "description": "A super quick and effective 5 minute arm workout using dumbbells/weights! The full, follow along, workout can be found on my ...",
        "thumbnails": {
          "default": {
            "url": "https://i.ytimg.com/vi/YEyFdtni3uU/default.jpg",
            "width": 120,
            "height": 90
          },
          "medium": {
            "url": "https://i.ytimg.com/vi/YEyFdtni3uU/mqdefault.jpg",
            "wid