<a href="https://colab.research.google.com/github/61388013i/MONDAY-MORNING/blob/main/61388013I_%E6%B4%AA%E8%B3%A2%E5%B9%B8_1013%E4%BD%9C%E6%A5%AD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import os
import time
import requests
import pandas as pd
from typing import Dict, Any, List

# ======= 必填：你的 API Key 與頻道 ID =======
# 建議用環境變數，或直接替換 "YOUR_API_KEY"
YOUTUBE_API_KEY = "AIzaSyCsMF3cNBd3lFKvifBnMtCCBPUDchFT6D4"

# 請將 "YOUR_CHANNEL_ID_HERE" 替換成你要抓取的頻道 ID
CHANNEL_ID = "UCXx_j6wUS3Vos8EX3XrgbUg"

YT_API = "https://www.googleapis.com/youtube/v3"

def yt_get(path: str, params: Dict[str, Any]) -> Dict[str, Any]:
    """簡單封裝 GET + 退避重試。"""
    url = f"{YT_API}/{path}"
    params = {**params, "key": YOUTUBE_API_KEY}
    for attempt in range(3):
        r = requests.get(url, params=params, timeout=15)
        if r.status_code == 200:
            return r.json()
        time.sleep(1.2 * (attempt + 1))
    r.raise_for_status()

def get_channel_info(channel_id: str) -> Dict[str, Any]:
    """取頻道名稱與 uploads playlist id。"""
    data = yt_get(
        "channels",
        {"part": "snippet,contentDetails,statistics", "id": channel_id, "maxResults": 1},
    )
    items = data.get("items", [])
    if not items:
        raise ValueError(f"找不到頻道：{channel_id}")
    ch = items[0]
    return {
        "channelId": ch["id"],
        "channelName": ch["snippet"]["title"],
        "uploadsPlaylistId": ch["contentDetails"]["relatedPlaylists"]["uploads"],
        "videoCount": int(ch["statistics"].get("videoCount", 0)),
    }

def iter_all_video_ids(playlist_id: str) -> List[str]:
    """遍歷 uploads playlist，回傳所有 videoId。"""
    out: List[str] = []
    token = None
    while True:
        data = yt_get(
            "playlistItems",
            {
                "part": "contentDetails",
                "playlistId": playlist_id,
                "maxResults": 50,
                "pageToken": token,
            },
        )
        for it in data.get("items", []):
            out.append(it["contentDetails"]["videoId"])
        token = data.get("nextPageToken")
        if not token:
            break
    return out

def batched(lst: List[str], n: int) -> List[List[str]]:
    return [lst[i : i + n] for i in range(0, len(lst), n)]

def fetch_video_rows(video_ids: List[str]) -> List[Dict[str, Any]]:
    """用 videos.list 取得六欄所需資訊。"""
    rows: List[Dict[str, Any]] = []
    for chunk in batched(video_ids, 50):  # API 單次最多 50 個
        data = yt_get(
            "videos",
            {"part": "snippet", "id": ",".join(chunk), "maxResults": 50},
        )
        for v in data.get("items", []):
            s = v["snippet"]
            rows.append(
                {
                    "videoId": v["id"],
                    "title": s.get("title", ""),
                    "uploadTime": s.get("publishedAt", ""),
                    "description": s.get("description", ""),
                }
            )
        time.sleep(0.1)
    return rows

def main():
    if YOUTUBE_API_KEY == "YOUR_API_KEY":
        raise SystemExit("請先把 YOUTUBE_API_KEY 換成你的 API Key（或設為環境變數）。")

    if CHANNEL_ID == "YOUR_CHANNEL_ID_HERE":
         raise SystemExit("請將 CHANNEL_ID 替換成你要抓取的頻道 ID。")


    ch = get_channel_info(CHANNEL_ID)
    print(f"頻道名稱(channelName): {ch['channelName']}")
    print(f"頻道ID(channelId): {ch['channelId']}")
    print(f"官方影片總數(videoCount): {ch['videoCount']}\n")

    video_ids = iter_all_video_ids(ch["uploadsPlaylistId"])
    print(f"實際抓到的影片數：{len(video_ids)}")

    rows = fetch_video_rows(video_ids)

    # 組成剛好六欄的 DataFrame
    df = pd.DataFrame(rows, columns=["videoId", "title", "uploadTime", "description"])
    df.insert(0, "channelId", ch["channelId"])
    df.insert(0, "channelName", ch["channelName"])

    # 印出 shape 與欄名
    print("\nDataFrame.shape:", df.shape)
    print("DataFrame columns:", list(df.columns))

    # 輸出 CSV
    fname = f"{ch['channelName'].replace('/', '_')}_{ch['channelId']}.csv"
    df.to_csv(fname, index=False, encoding="utf-8-sig")
    print(f"\n已輸出 CSV：{fname}")

    # 頭 5 列供檢視
    print("\n=== 頭 5 列 ===")
    with pd.option_context("display.max_colwidth", 60):
        print(df.head(5))

if __name__ == "__main__":
    main()

頻道名稱(channelName): Snoopy
頻道ID(channelId): UCXx_j6wUS3Vos8EX3XrgbUg
官方影片總數(videoCount): 819

實際抓到的影片數：819

DataFrame.shape: (819, 6)
DataFrame columns: ['channelName', 'channelId', 'videoId', 'title', 'uploadTime', 'description']

已輸出 CSV：Snoopy_UCXx_j6wUS3Vos8EX3XrgbUg.csv

=== 頭 5 列 ===
  channelName                 channelId      videoId  \
0      Snoopy  UCXx_j6wUS3Vos8EX3XrgbUg  l7qWKzAVmVo   
1      Snoopy  UCXx_j6wUS3Vos8EX3XrgbUg  aQTyLL9P34Y   
2      Snoopy  UCXx_j6wUS3Vos8EX3XrgbUg  rdG6oiz0Mbg   
3      Snoopy  UCXx_j6wUS3Vos8EX3XrgbUg  IuAJihV3yCs   
4      Snoopy  UCXx_j6wUS3Vos8EX3XrgbUg  8suM1GhvRdg   

                                                         title  \
0  Snoopy & Woodstock Pirates | The Snoopy Show Clip | Appl...   
1                   Detective Snoopy | The Snoopy Show #shorts   
2                      Snoopy | Meet the Gang | Pigpen #shorts   
3  Peanuts | Meet the Gang! (75th Anniversary of Peanuts) |...   
4   Snoopy | Meet the Gang | Other Friends 

In [None]:
readme_content = """
# YouTube Channel Video Scraper

## Purpose

This script fetches a list of videos from a specified YouTube channel using the YouTube Data API v3, and outputs the data as a pandas DataFrame and a CSV file.

## Features

- Fetches channel information (name, ID, uploads playlist ID, video count).
- Iterates through all video IDs in the uploads playlist.
- Fetches detailed information for each video (title, upload time, description).
- Organizes the fetched data into a pandas DataFrame with six specific columns.
- Exports the DataFrame to a CSV file.
- Includes basic error handling and retry logic for API calls.

## Function Descriptions

### `yt_get`
**Purpose:** A helper function to make GET requests to the YouTube Data API with retry logic.
**Parameters:**
- `path`: The API endpoint path.
- `params`: A dictionary of query parameters for the API request.
**Returns:** A dictionary containing the JSON response from the API.
--------------------
### `get_channel_info`
**Purpose:** Retrieves essential information about a YouTube channel.
**Parameters:**
- `channel_id`: The ID of the YouTube channel.
**Returns:** A dictionary containing the channel's ID, name, uploads playlist ID, and video count.
--------------------
### `iter_all_video_ids`
**Purpose:** Iterates through all items in a given YouTube playlist and collects the video IDs.
**Parameters:**
- `playlist_id`: The ID of the YouTube playlist (e.g., the uploads playlist).
**Returns:** A list of video IDs.
--------------------
### `batched`
**Purpose:** Splits a list into smaller chunks (batches).
**Parameters:**
- `lst`: The input list.
- `n`: The size of each batch.
**Returns:** A list of lists, where each inner list is a batch from the original list.
--------------------
### `fetch_video_rows`
**Purpose:** Fetches detailed information for a list of video IDs using the YouTube Data API.
**Parameters:**
- `video_ids`: A list of video IDs.
**Returns:** A list of dictionaries, where each dictionary contains the 'videoId', 'title', 'uploadTime', and 'description' for a video.
--------------------
### `main`
**Purpose:** The main execution function that orchestrates the process of fetching channel info, video IDs, and video details, then organizing and exporting the data.
**Returns:** None. It prints information to the console and saves a CSV file.
--------------------

## Usage Guide

To use this script, you will need a YouTube Data API v3 key and the ID of the YouTube channel you want to fetch videos from.

### Prerequisites

You need a **YouTube Data API v3 key**. If you don't have one, you can obtain it from the [Google Cloud Console](https://console.cloud.google.com/apis/credentials). You will need to create a project and enable the YouTube Data API v3 for that project.

### Setting the YouTube Data API Key

Locate the line `YOUTUBE_API_KEY = "AIzaSyCsMF3cNBd3lFKvifBnMtCCBPUDchFT6D4"` in the script. Replace `"AIzaSyCsMF3cNBd3lFKvifBnMtCCBPUDchFT6D4"` with your actual API key.
"""

with open('README.md', 'w', encoding='utf-8') as f:
    f.write(readme_content)

print("README.md file has been created.")

## Save as html file

### Subtask:
Write the generated HTML content to a file named `briefing_report.html`.

**Reasoning**:
Write the generated HTML content to a file named `briefing_report.html`.

In [28]:
with open('briefing_report.html', 'w', encoding='utf-8') as f:
    f.write(html_report_content)