In [1]:
keywords = [
    "삼성전자", "SK하이닉스", "네이버", "카카오", "삼성바이오로직스",
    "LG화학", "현대차", "기아", "LG에너지솔루션", "POSCO홀딩스",
    "현대모비스", "셀트리온", "카카오게임즈", "크래프톤", "HMM",
    "대한항공", "DB하이텍", "아모레퍼시픽", "롯데쇼핑", "CJ ENM"
]

In [8]:
import os
import requests
from typing import List
from isodate import parse_duration
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

def fetch_youtube_with_detail(keyword: str, max_results: int = 10) -> List[dict]:
    # Step 1: search.list to get videoIds
    search_url = "https://www.googleapis.com/youtube/v3/search"
    search_params = {
        "part": "snippet",
        "q": keyword,
        "key": YOUTUBE_API_KEY,
        "type": "video",
        "maxResults": max_results,
        "order": "date"
    }

    search_res = requests.get(search_url, params=search_params)
    search_res.raise_for_status()
    search_items = search_res.json().get("items", [])
    video_ids = [item["id"]["videoId"] for item in search_items]

    if not video_ids:
        return []

    # Step 2: videos.list to get details
    detail_url = "https://www.googleapis.com/youtube/v3/videos"
    detail_params = {
        "part": "snippet,statistics,contentDetails",
        "key": YOUTUBE_API_KEY,
        "id": ",".join(video_ids)
    }

    detail_res = requests.get(detail_url, params=detail_params)
    detail_res.raise_for_status()
    detail_items = detail_res.json().get("items", [])

    result = []
    for item in detail_items:
        snippet = item["snippet"]
        statistics = item.get("statistics", {})
        content_details = item.get("contentDetails", {})

        result.append({
            "type": "youtube",
            "title": snippet["title"],
            "url": f"https://www.youtube.com/watch?v={item['id']}",
            "keyword": keyword,
            "summary": snippet.get("description", ""),
            "image_url": snippet.get("thumbnails", {}).get("high", {}).get("url"),
            "published_at": snippet.get("publishedAt", datetime.now().isoformat()),
            "channel_title": snippet.get("channelTitle"),
            "view_count": int(statistics.get("viewCount", 0)),
            "like_count": int(statistics.get("likeCount", 0)),
            "comment_count": int(statistics.get("commentCount", 0)),
            "duration_sec": int(parse_duration(content_details.get("duration", "PT0S")).total_seconds())
        })

    return result

In [16]:
import time
import json

all_results = []
for kw in keywords:
    try:
        results = fetch_youtube_with_detail(kw, max_results=30)
        all_results.extend(results)
        time.sleep(1)
    except Exception as e:
        print(f"Error processing {kw}: {e}")

output_dir = "./data"
os.makedirs(output_dir, exist_ok=True)

with open(os.path.join(output_dir, "youtube_search_results.json"), "w", encoding="utf-8") as f:
    json.dump(all_results, f, ensure_ascii=False, indent=2)