<a href="https://colab.research.google.com/github/61388013i/MONDAY-MORNING/blob/main/61388013I_%E6%B4%AA%E8%B3%A2%E5%B9%B8_1013%E4%BD%9C%E6%A5%AD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
"""
抓取 YouTube 頻道（GOT7 Official）的影片清單，輸出為 DataFrame（六欄）與 CSV。
需要：YouTube Data API v3 的 API Key
"""
import os
import time
import requests
import pandas as pd
from typing import Dict, Any, List

# ======= 必填：你的 API Key 與頻道 ID（已改為 GOT7） =======
# 建議用環境變數，或直接替換 "YOUR_API_KEY"
YOUTUBE_API_KEY = "AIzaSyCsMF3cNBd3lFKvifBnMtCCBPUDchFT6D4"
CHANNEL_ID = "UC8HEl74jL3bLLwfDP1OALxw"  # GOT7 Official

YT_API = "https://www.googleapis.com/youtube/v3"

def yt_get(path: str, params: Dict[str, Any]) -> Dict[str, Any]:
    """簡單封裝 GET + 退避重試。"""
    url = f"{YT_API}/{path}"
    params = {**params, "key": YOUTUBE_API_KEY}
    for attempt in range(3):
        r = requests.get(url, params=params, timeout=15)
        if r.status_code == 200:
            return r.json()
        time.sleep(1.2 * (attempt + 1))
    r.raise_for_status()

def get_channel_info(channel_id: str) -> Dict[str, Any]:
    """取頻道名稱與 uploads playlist id。"""
    data = yt_get(
        "channels",
        {"part": "snippet,contentDetails,statistics", "id": channel_id, "maxResults": 1},
    )
    items = data.get("items", [])
    if not items:
        raise ValueError(f"找不到頻道：{channel_id}")
    ch = items[0]
    return {
        "channelId": ch["id"],
        "channelName": ch["snippet"]["title"],
        "uploadsPlaylistId": ch["contentDetails"]["relatedPlaylists"]["uploads"],
        "videoCount": int(ch["statistics"].get("videoCount", 0)),
    }

def iter_all_video_ids(playlist_id: str) -> List[str]:
    """遍歷 uploads playlist，回傳所有 videoId。"""
    out: List[str] = []
    token = None
    while True:
        data = yt_get(
            "playlistItems",
            {
                "part": "contentDetails",
                "playlistId": playlist_id,
                "maxResults": 50,
                "pageToken": token,
            },
        )
        for it in data.get("items", []):
            out.append(it["contentDetails"]["videoId"])
        token = data.get("nextPageToken")
        if not token:
            break
    return out

def batched(lst: List[str], n: int) -> List[List[str]]:
    return [lst[i : i + n] for i in range(0, len(lst), n)]

def fetch_video_rows(video_ids: List[str]) -> List[Dict[str, Any]]:
    """用 videos.list 取得六欄所需資訊。"""
    rows: List[Dict[str, Any]] = []
    for chunk in batched(video_ids, 50):  # API 單次最多 50 個
        data = yt_get(
            "videos",
            {"part": "snippet", "id": ",".join(chunk), "maxResults": 50},
        )
        for v in data.get("items", []):
            s = v["snippet"]
            rows.append(
                {
                    "videoId": v["id"],
                    "title": s.get("title", ""),
                    "uploadTime": s.get("publishedAt", ""),
                    "description": s.get("description", ""),
                }
            )
        time.sleep(0.1)
    return rows

def main():
    if YOUTUBE_API_KEY == "YOUR_API_KEY":
        raise SystemExit("請先把 YOUTUBE_API_KEY 換成你的 API Key（或設為環境變數）。")

    ch = get_channel_info(CHANNEL_ID)
    print(f"頻道名稱(channelName): {ch['channelName']}")
    print(f"頻道ID(channelId): {ch['channelId']}")
    print(f"官方影片總數(videoCount): {ch['videoCount']}\n")

    video_ids = iter_all_video_ids(ch["uploadsPlaylistId"])
    print(f"實際抓到的影片數：{len(video_ids)}")

    rows = fetch_video_rows(video_ids)

    # 組成剛好六欄的 DataFrame
    df = pd.DataFrame(rows, columns=["videoId", "title", "uploadTime", "description"])
    df.insert(0, "channelId", ch["channelId"])
    df.insert(0, "channelName", ch["channelName"])

    # 印出 shape 與欄名
    print("\nDataFrame.shape:", df.shape)
    print("DataFrame columns:", list(df.columns))

    # 輸出 CSV
    fname = f"{ch['channelName'].replace('/', '_')}_{ch['channelId']}.csv"
    df.to_csv(fname, index=False, encoding="utf-8-sig")
    print(f"\n已輸出 CSV：{fname}")

    # 頭 5 列供檢視
    print("\n=== 頭 5 列 ===")
    with pd.option_context("display.max_colwidth", 60):
        print(df.head(5))

if __name__ == "__main__":
    main()

頻道名稱(channelName): GOT7
頻道ID(channelId): UC8HEl74jL3bLLwfDP1OALxw
官方影片總數(videoCount): 579

實際抓到的影片數：579

DataFrame.shape: (579, 6)
DataFrame columns: ['channelName', 'channelId', 'videoId', 'title', 'uploadTime', 'description']

已輸出 CSV：GOT7_UC8HEl74jL3bLLwfDP1OALxw.csv

=== 頭 5 列 ===
  channelName                 channelId      videoId  \
0        GOT7  UC8HEl74jL3bLLwfDP1OALxw  ipfqLJOydYg   
1        GOT7  UC8HEl74jL3bLLwfDP1OALxw  MlJTwfauK-c   
2        GOT7  UC8HEl74jL3bLLwfDP1OALxw  4mLbRFZb4fA   
3        GOT7  UC8HEl74jL3bLLwfDP1OALxw  WIljWSoab_E   
4        GOT7  UC8HEl74jL3bLLwfDP1OALxw  iUELZWw7Wpo   

                                               title            uploadTime  \
0                         7 Years With GOT7 💚 I GOT7  2021-01-15T15:00:12Z   
1                         GOT7 "Piece of GOT7" EP.03  2021-01-08T08:00:06Z   
2                         GOT7 "Piece of GOT7" EP.02  2021-01-01T08:00:03Z   
3                         GOT7 "Piece of GOT7" EP.01  2020-12-25T