In [37]:
pip install dotenv

Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)
Installing collected packages: dotenv
Successfully installed dotenv-0.9.9
Note: you may need to restart the kernel to use updated packages.


In [1]:
import requests
import pandas as pd
import time
import os
from dotenv import load_dotenv

load_dotenv()
API_KEY = os.getenv('YOUTUBE_API_KEY')

SEARCH_URL = "https://www.googleapis.com/youtube/v3/search"
VIDEOS_URL = "https://www.googleapis.com/youtube/v3/videos"

REGION_CODE = "FR"
MAX_PER_PAGE = 50      # max autoris√©
TARGET_VIDEOS = 1000   # par niche


In [33]:
def collect_niche_videos(keywords, target=1000):
    video_ids = []

    for keyword in keywords:
        page_token = None

        while len(video_ids) < target:
            params = {
                "part": "snippet",
                "q": keyword,
                "type": "video",
                "order": "date",
                "maxResults": MAX_PER_PAGE,
                "relevanceLanguage": "fr",
                "regionCode": "FR",
                "key": API_KEY
            }

            if page_token:
                params["pageToken"] = page_token

            response = requests.get(SEARCH_URL, params=params)
            data = response.json()
            print(data)

            for item in data.get("items", []):
                video_ids.append(item["id"]["videoId"])

            page_token = data.get("nextPageToken")
            if not page_token:
                break

    # üîë d√©duplication
    video_ids = list(set(video_ids))

    return video_ids[:target]


In [35]:
# TEST SIMPLE : est-ce que search renvoie des r√©sultats ?
test_ids = collect_niche_videos("humour", target=10)
print(len(test_ids))
test_ids[:5]


{'kind': 'youtube#searchListResponse', 'etag': 'XXxffMula6Jisq1oy48IHI1HClE', 'nextPageToken': 'CDIQAA', 'regionCode': 'FR', 'pageInfo': {'totalResults': 1000000, 'resultsPerPage': 50}, 'items': [{'kind': 'youtube#searchResult', 'etag': '84Wsdzm5g4wAEz-rM3EIqImIZm0', 'id': {'kind': 'youtube#video', 'videoId': 'mH4TFhALWjw'}, 'snippet': {'publishedAt': '2026-01-22T14:30:41Z', 'channelId': 'UCHUEQLgynl-5ODWAU8SSp8A', 'title': 'Il y a quelque chose auquel je veux que tu r√©pondes... messages canalis√©s sur mes pens√©es et sent...', 'description': 'Il y a quelque chose auquel je veux que tu r√©pondes‚Ä¶\n\nLove rulesz Messages canalis√©s de ton/ta futur(e) conjoint(e) | Pens√©es ...', 'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/mH4TFhALWjw/default.jpg', 'width': 120, 'height': 90}, 'medium': {'url': 'https://i.ytimg.com/vi/mH4TFhALWjw/mqdefault.jpg', 'width': 320, 'height': 180}, 'high': {'url': 'https://i.ytimg.com/vi/mH4TFhALWjw/hqdefault.jpg', 'width': 480, 'height': 360}}

['GUcbVEKD_vs', 'HC-l85p42cA', 'a90cynmfK3Q', 'KJFBuSnzsEE', 'kR_LNyQJnU8']

In [39]:
def fetch_video_details(video_ids):
    rows = []

    for i in range(0, len(video_ids), 50):
        batch = video_ids[i:i+50]

        params = {
            "part": "snippet,statistics,contentDetails",
            "id": ",".join(batch),
            "key": API_KEY
        }

        response = requests.get(VIDEOS_URL, params=params)
        data = response.json()

        for video in data.get("items", []):
            rows.append({
                "video_id": video["id"],
                "title": video["snippet"]["title"],
                "description": video["snippet"].get("description", ""),
                'channel_id': video['snippet'].get('channelId'),
                'category_id': video['snippet'].get('categoryId'),
                'language': video['snippet'].get('defaultAudioLanguage', 'N/A'),
                "channel": video["snippet"]["channelTitle"],
                "published_at": video["snippet"]["publishedAt"],
                "duration": video.get("contentDetails", {}).get("duration", None),
                "views": int(video["statistics"].get("viewCount", 0)),
                "likes": int(video["statistics"].get("likeCount", 0)),
                "comments": int(video["statistics"].get("commentCount", 0))
            })

        time.sleep(0.2)

    return pd.DataFrame(rows)


In [41]:
niches = {
    "humour": [
        "humour",
        "sketch",
        "stand up",
        "humoriste",
        "blague",
        "vid√©o dr√¥le",
        "parodie",
        "humour fran√ßais",
        "com√©die",
        "tiktok humour"
    ],
    "divertissement": [
        "divertissement",
        "challenge",
        "buzz",
        "vid√©o virale",
        "prank",
        "people",
        "t√©l√© r√©alit√©",
        "√©mission TV",
        "web s√©rie",
        "fun"
    ]
}

for niche, keywords in niches.items():
    print(f"Collecte : {niche}")
    ids = collect_niche_videos(keywords, TARGET_VIDEOS)
    df = fetch_video_details(ids)
    df.to_csv(f"market_{niche.replace(' ', '_')}_fr.csv", index=False)
    print(f"market_{niche.replace(' ', '_')}_fr.csv sauvegard√© ({len(df)} vid√©os)")



Collecte : humour
{'kind': 'youtube#searchListResponse', 'etag': 'I_XmDuFBM581PqVje6E6c17sOvU', 'nextPageToken': 'CDIQAA', 'regionCode': 'FR', 'pageInfo': {'totalResults': 1000000, 'resultsPerPage': 50}, 'items': [{'kind': 'youtube#searchResult', 'etag': 'zjh7ZfgYUaYAfY_paQmRbOENSwU', 'id': {'kind': 'youtube#video', 'videoId': '3zoCzLT4am0'}, 'snippet': {'publishedAt': '2026-01-22T15:21:24Z', 'channelId': 'UCyIzD_MX0ng84HxptEJvZww', 'title': 'For Real üòÇ #memes #popular #viral #funnymemes #relatable #related #shorts #ronaldo #humour #meme', 'description': '', 'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/3zoCzLT4am0/default.jpg', 'width': 120, 'height': 90}, 'medium': {'url': 'https://i.ytimg.com/vi/3zoCzLT4am0/mqdefault.jpg', 'width': 320, 'height': 180}, 'high': {'url': 'https://i.ytimg.com/vi/3zoCzLT4am0/hqdefault.jpg', 'width': 480, 'height': 360}}, 'channelTitle': 'Toxic Game ', 'liveBroadcastContent': 'none', 'publishTime': '2026-01-22T15:21:24Z'}}, {'kind': 'youtub

In [6]:
SEARCH_URL = "https://www.googleapis.com/youtube/v3/search"

params = {
    "part": "snippet",
    "q": keyword,
    "type": "video",
    "order": "date",
    "maxResults": MAX_PER_PAGE,
    "relevanceLanguage": "fr",
    "regionCode": "FR",
    "key": API_KEY
}

r = requests.get(SEARCH_URL, params=params)
print(r.status_code)
print(r.text)


200
{
  "kind": "youtube#searchListResponse",
  "etag": "stL5RAcv5tMWhCiBCbTaMt4VSJ8",
  "nextPageToken": "CAUQAA",
  "regionCode": "FR",
  "pageInfo": {
    "totalResults": 1000000,
    "resultsPerPage": 5
  },
  "items": [
    {
      "kind": "youtube#searchResult",
      "etag": "IFxHSllU6r0KO23Qh9vufXJajgA",
      "id": {
        "kind": "youtube#video",
        "videoId": "YEyFdtni3uU"
      },
      "snippet": {
        "publishedAt": "2023-05-31T14:00:31Z",
        "channelId": "UCpQ34afVgk8cRQBjSJ1xuJQ",
        "title": "5 MIN ARM WORKOUT",
        "description": "A super quick and effective 5 minute arm workout using dumbbells/weights! The full, follow along, workout can be found on my ...",
        "thumbnails": {
          "default": {
            "url": "https://i.ytimg.com/vi/YEyFdtni3uU/default.jpg",
            "width": 120,
            "height": 90
          },
          "medium": {
            "url": "https://i.ytimg.com/vi/YEyFdtni3uU/mqdefault.jpg",
            "wid