### Youtube Data ETL code

In [1]:
from googleapiclient.discovery import build
import pandas as pd
import datetime

In [None]:
# API 설정
API_KEY = 'YOUTUBE_API_KEY'
youtube = build('youtube', 'v3', developerKey=API_KEY)

# 검색어 기반으로 동영상 수집
search_response = youtube.search().list(
    q="mukbang",
    part="snippet",
    type="video",
    maxResults=50,
    order="viewCount"
).execute()

# video_id 목록 추출
video_ids = [item["id"]["videoId"] for item in search_response["items"]]

# 영상 상세 정보 요청
video_response = youtube.videos().list(
    part="snippet,statistics",
    id=",".join(video_ids)
).execute()


In [8]:
# 결과 리스트
rows = []
for item in video_response["items"]:
    snippet = item["snippet"]
    stats = item.get("statistics", {})
    
    video_id = item["id"]
    title = snippet["title"]
    channel_title = snippet["channelTitle"]
    category_id = snippet["categoryId"]
    tags = "|".join(snippet.get("tags", [])) if "tags" in snippet else "[none]"
    views = stats.get("viewCount", "0")
    likes = stats.get("likeCount", "0")
    dislikes = stats.get("dislikeCount", "0")  # 일부 API에서는 제거됨
    comment_count = stats.get("commentCount", "0")
    thumbnail_link = snippet["thumbnails"]["default"]["url"]
    date = snippet["publishedAt"]
    date = datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").strftime("%d.%m")

    rows.append([
        video_id, title, channel_title, category_id, tags,
        views, likes, dislikes, comment_count, thumbnail_link, date
    ])

# DataFrame 생성 및 저장
df = pd.DataFrame(rows, columns=[
    "video_id", "title", "channel_title", "category_id", "tags",
    "views", "likes", "dislikes", "comment_count", "thumbnail_link", "date"
])

df.to_csv("youtube_dataset_Mukbang.csv", index=False)