<a href="https://colab.research.google.com/github/41371108H/114-1repo/blob/main/HW4_%E7%88%AC%E8%9F%B2_41371108H.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip -q install gspread gspread_dataframe google-auth google-auth-oauthlib google-auth-httplib2 \
               gradio pandas beautifulsoup4 google-generativeai python-dateutil

In [5]:
import os, time, uuid, re, json, datetime
from datetime import datetime as dt, timedelta
from dateutil.tz import gettz
import pandas as pd
import gradio as gr
import requests
from bs4 import BeautifulSoup

import google.generativeai as genai

# Google Auth & Sheets
from google.colab import auth
import gspread
from gspread_dataframe import set_with_dataframe, get_as_dataframe
from google.auth.transport.requests import Request
from google.oauth2 import service_account
from google.auth import default

In [6]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

In [7]:
from google.colab import userdata

# 從 Colab Secrets 中獲取 API 金鑰
api_key = userdata.get('repo')

# 使用獲取的金鑰配置 genai
genai.configure(api_key=api_key)

model = genai.GenerativeModel('gemini-2.5-pro')

In [8]:
SHEET_URL = "https://docs.google.com/spreadsheets/d/1YmuPBSwZVq0m0-lwpsjCEcyXik3WVeE7a23sRin5ojw/edit?usp=sharing"
WORKSHEET_NAME = "工作表1"
TIMEZONE = "Asia/Taipei"

In [9]:
PTT_HEADER = [
    "post_id","title","url","date","author","nrec","created_at",
    "fetched_at","content"
]
TERMS_HEADER = ["term","freq","df_count","tfidf_mean","examples"]

In [10]:
def ensure_spreadsheet(name):
    try:
        sh = gc.open(name)  # returns gspread.models.Spreadsheet
    except gspread.SpreadsheetNotFound:
        sh = gc.create(name)
    return sh

sh = ensure_spreadsheet(WORKSHEET_NAME)

In [11]:
def ensure_worksheet(sh, title, header):
    try:
        ws = sh.worksheet(title)
    except gspread.WorksheetNotFound:
        ws = sh.add_worksheet(title=title, rows="1000", cols=str(len(header)+5))
        ws.update([header])
    # 若沒有表頭就補上
    data = ws.get_all_values()
    if not data or (data and data[0] != header):
        ws.clear()
        ws.update([header])
    return ws

In [12]:
ws_ptt_posts = ensure_worksheet(sh, "ptt_movie_posts", PTT_HEADER)
ws_ptt_terms = ensure_worksheet(sh, "ptt_movie_terms", TERMS_HEADER)

In [13]:
# 安裝與匯入
!pip -q install requests beautifulsoup4 pandas

import re
import time
import math
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd

BASE = "https://pttweb.tw"
BOARD = "Drama-Ticket"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}

def get_board_page_urls(page_numbers):
    """
    給定頁碼清單 -> 回傳每頁的完整 URL
    例如 page_numbers=[8907, 8906]
    """
    return [f"{BASE}/{BOARD}/{p}" for p in page_numbers]

def extract_post_links_from_board(board_page_url, session=None):
    """
    從看板列表頁抓文章連結（/Drama-Ticket/M.xxxxx）
    回傳該頁找到的文章完整 URL 清單
    """
    sess = session or requests.Session()
    r = sess.get(board_page_url, headers=HEADERS, timeout=20)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    # 抓出所有 a 標籤中，href 以 /Drama-Ticket/M. 開頭者
    post_urls = set()
    for a in soup.select('a[href^="/Drama-Ticket/M."]'):
        href = a.get("href", "")
        # 有些連結可能帶 fragment 或重複，這裡單純用 set 去重
        url = urljoin(BASE, href)
        post_urls.add(url)
    return list(post_urls)

def parse_post(post_url, session=None):
    """
    解析文章頁，回傳 dict: {title, author, datetime, url}
    """
    sess = session or requests.Session()
    r = sess.get(post_url, headers=HEADERS, timeout=20)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    # 標題通常在頁面頂部的 "標題 ..." 行；同時 <title> 也可備援
    # 1) 嘗試找含「標題」二字的區塊
    # pttweb 會把「標題 XXX」「作者 YYY」「時間 (YYYY-MM-DD hh:mm:ss)」放在同一個資訊條上
    info_text = soup.get_text(" ", strip=True)

    # 從全文字串以正則擷取
    # 標題
    title = None
    m_title = re.search(r"標題\s+(.+?)\s+看板", info_text)
    if m_title:
        title = m_title.group(1).strip()
    else:
        # 備援：用 <title> 的前半段
        doc_title = soup.title.get_text(strip=True) if soup.title else ""
        title = doc_title.split(" - ")[0] if " - " in doc_title else doc_title

    # 作者（格式通常是：作者 username (nickname)）
    author = None
    m_author = re.search(r"作者\s+(.+?)\s+時間", info_text)
    if m_author:
        author = m_author.group(1).strip()

    # 時間（格式：時間 (2025-10-26 18:49:15)）
    dt = None
    m_dt = re.search(r"時間\s*\((\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})\)", info_text)
    if m_dt:
        dt = m_dt.group(1)

    return {
        "title": title,
        "author": author,
        "datetime": dt,
        "url": post_url,
    }

def crawl_board_pages(page_numbers, sleep_sec=0.6):
    """
    依指定頁碼清單抓資料：
    1) 列表頁 -> 文章連結
    2) 文章頁 -> 解析 title/author/datetime
    回傳 DataFrame
    """
    session = requests.Session()
    all_posts = []
    seen_urls = set()

    for page in page_numbers:
        board_url = f"{BASE}/{BOARD}/{page}"
        try:
            post_urls = extract_post_links_from_board(board_url, session=session)
        except Exception as e:
            print(f"[WARN] 讀取列表頁失敗 {board_url}: {e}")
            continue

        # 逐篇抓
        for u in post_urls:
            if u in seen_urls:
                continue
            seen_urls.add(u)

            try:
                data = parse_post(u, session=session)
                data["page"] = page
                all_posts.append(data)
            except Exception as e:
                print(f"[WARN] 解析文章失敗 {u}: {e}")
            time.sleep(sleep_sec)  # 禮貌性間隔，避免太頻繁請求

        time.sleep(sleep_sec)

    df = pd.DataFrame(all_posts, columns=["page", "title", "author", "datetime", "url"])
    # 依時間排序（若有缺時間就放後面）
    df["datetime"] = pd.to_datetime(df["datetime"], errors="coerce")
    df = df.sort_values(["datetime", "page"], ascending=[False, False], na_position="last").reset_index(drop=True)
    return df

# ====== 範例使用 ======
# 例如：抓第 8907 與 8906 頁（可自行改成你想測試的頁碼）
pages_to_crawl = [8907, 8906]
df = crawl_board_pages(pages_to_crawl, sleep_sec=0.5)
df.head(10)


Unnamed: 0,page,title,author,datetime,url
0,8907,[換票] 多種票券 換 SUPER JUNIOR/李聖傑/金幣票 (3/3篇),healthyno1 (healthy),2025-10-26 18:49:15,https://pttweb.tw/Drama-Ticket/M.1761475757.A.BCA
1,8907,[售票] 11/9 郭富城高雄巨蛋演唱會，特A區5880 (6/6篇),"kidd1984 (Hello, stranger.)",2025-10-26 18:47:14,https://pttweb.tw/Drama-Ticket/M.1761475636.A.30F
2,8907,[售票] 12/19 OneRepublic 高雄演唱會*1 (2/2篇),joesu1990 (euphoria),2025-10-26 18:30:13,https://pttweb.tw/Drama-Ticket/M.1761474615.A.BCA
3,8907,[售票] SUPER JUNIOR SUPER SHOW 10 11/14 11/1,MissSoda (soda),2025-10-26 17:45:50,https://pttweb.tw/Drama-Ticket/M.1761471952.A.0D8
4,8907,[贈送] 急贈 今晚高雄衛武營 易北愛樂,daretolove (dare),2025-10-26 17:43:25,https://pttweb.tw/Drama-Ticket/M.1761471807.A.26B
5,8907,[換票] 理想混蛋800*1 1500*4共和800*4換李聖傑,Lovemei (流氓駿),2025-10-26 17:35:52,https://pttweb.tw/Drama-Ticket/M.1761471355.A.EE0
6,8907,[售票] 2025/11/16 台南 八月，在我家,t888877 (Chun),2025-10-26 17:20:07,https://pttweb.tw/Drama-Ticket/M.1761470409.A.99C
7,8907,[售票] 11/15(六) Super Junior 5680*1,YuMo0824 (紅茶好好喝),2025-10-26 17:03:51,https://pttweb.tw/Drama-Ticket/M.1761469433.A.E9E
8,8907,[售票] 10/27大巨蛋秀泰 鬼滅之刃無限城電影票,alienpaufu (鴨齁),2025-10-26 16:59:58,https://pttweb.tw/Drama-Ticket/M.1761469200.A.F5C
9,8907,[售票] 雲門舞集（關不掉的耳朵）（台中）,scientist784 (小瑞),2025-10-26 16:31:19,https://pttweb.tw/Drama-Ticket/M.1761467481.A.C39


In [14]:
# 存 CSV（放在 Colab 目前工作路徑）
out_csv = "drama_ticket_posts.csv"
df.to_csv(out_csv, index=False, encoding="utf-8-sig")
print(f"已輸出：{out_csv}")

# 生成 {title: href} 對照（如遇重名標題可改成 title+datetime 做 key）
title_to_href = dict(zip(df["title"], df["url"]))
# 看幾個例子
for k, v in list(title_to_href.items())[:5]:
    print(k, "->", v)

已輸出：drama_ticket_posts.csv
[換票] 多種票券 換 SUPER JUNIOR/李聖傑/金幣票 (3/3篇) -> https://pttweb.tw/Drama-Ticket/M.1761475757.A.BCA
[售票] 11/9 郭富城高雄巨蛋演唱會，特A區5880 (6/6篇) -> https://pttweb.tw/Drama-Ticket/M.1761475636.A.30F
[售票] 12/19 OneRepublic 高雄演唱會*1 (2/2篇) -> https://pttweb.tw/Drama-Ticket/M.1761474615.A.BCA
[售票] SUPER JUNIOR SUPER SHOW 10 11/14 11/1 -> https://pttweb.tw/Drama-Ticket/M.1761471952.A.0D8
[贈送] 急贈 今晚高雄衛武營 易北愛樂 -> https://pttweb.tw/Drama-Ticket/M.1761471807.A.26B


In [15]:
def crawl_by_start_and_count(start_page:int, how_many:int):
    """
    pttweb 的頁碼會遞減（例如 8907, 8906, ...）
    這裡預設從 start_page 一路往 (start_page-1) 抓 how_many 頁
    """
    pages = [start_page - i for i in range(how_many)]
    return crawl_board_pages(pages)

# 例：從 8907 起，抓 3 頁 -> 8907, 8906, 8905
df2 = crawl_by_start_and_count(8907, 3)
df2.tail()

Unnamed: 0,page,title,author,datetime,url
145,8905,[急售] 旺福 2025 旺聖節 降價,isedon (isedon),2025-10-25 00:30:37,https://pttweb.tw/Drama-Ticket/M.1761323439.A.522
146,8905,[換票] 理想混蛋1500換李聖傑800,piyo0622 (幻聽),2025-10-25 00:12:32,https://pttweb.tw/Drama-Ticket/M.1761322354.A.62F
147,8905,[急售] (G-DRAGON 權志龍 )4DX (2/2篇),laramusic (stupidwhoareu),2025-10-24 23:50:39,https://pttweb.tw/Drama-Ticket/M.1761321041.A.733
148,8905,[代訂] (風林火山x徵人啟弒x創戰神x泥娃娃）威秀 (11/12篇),laramusic (stupidwhoareu),2025-10-24 23:48:40,https://pttweb.tw/Drama-Ticket/M.1761320924.A.AC8
149,8905,[售票] 12/19 ONEREPUBLIC 2900*2,cp32j3x04 (酷man),2025-10-24 22:50:01,https://pttweb.tw/Drama-Ticket/M.1761317403.A.0BB


In [16]:
!pip -q install requests beautifulsoup4 pandas gspread google-auth google-auth-oauthlib google-auth-httplib2 gspread-dataframe jieba scikit-learn google-generativeai gradio

In [20]:
import re, time, requests, pandas as pd, numpy as np
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from datetime import datetime

# ---- Google 認證 / Sheets ----
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
from gspread_dataframe import set_with_dataframe, get_as_dataframe

creds, _ = default()
gc = gspread.authorize(creds)

# ---- NLP / TF-IDF ----
import jieba
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import Counter

# ---- Gemini ----
import google.generativeai as genai

# ---- Gradio ----
import gradio as gr

# ---- 爬蟲基本設定 ----
BASE  = "https://pttweb.tw"
BOARD = "Drama-Ticket"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"}

  re_han_default = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._%\-]+)", re.U)
  re_skip_default = re.compile("(\r\n|\s)", re.U)
  re_skip = re.compile("([a-zA-Z0-9]+(?:\.\d+)?%?)")


In [21]:
def extract_post_links_from_board(board_page_url, session=None):
    sess = session or requests.Session()
    r = sess.get(board_page_url, headers=HEADERS, timeout=20)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")
    # 文章連結格式：/Drama-Ticket/M.xxxxxxxxxx
    urls = { urljoin(BASE, a["href"]) for a in soup.select('a[href^="/Drama-Ticket/M."]') }
    return list(urls)

def parse_post(post_url, session=None):
    sess = session or requests.Session()
    r = sess.get(post_url, headers=HEADERS, timeout=20)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    # 以整頁文字用 regex 擷取：標題/作者/時間
    txt = soup.get_text(" ", strip=True)

    # 標題
    m_title = re.search(r"標題\s+(.+?)\s+看板", txt)
    if m_title:
        title = m_title.group(1).strip()
    else:
        title = (soup.title.get_text(strip=True) if soup.title else "").split(" - ")[0]

    # 作者
    m_author = re.search(r"作者\s+(.+?)\s+時間", txt)
    author = m_author.group(1).strip() if m_author else None

    # 時間
    m_dt = re.search(r"時間\s*\((\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})\)", txt)
    dt = m_dt.group(1) if m_dt else None

    return {
        "title": title,
        "author": author,
        "datetime": dt,
        "url": post_url
    }

def crawl_by_start_and_count(start_page:int, how_many:int, sleep_sec:float=0.5):
    """
    例如 start_page=8907, how_many=3 -> 會抓 8907, 8906, 8905
    """
    pages = [start_page - i for i in range(how_many)]
    session = requests.Session()
    seen = set()
    rows = []
    for p in pages:
        board_url = f"{BASE}/{BOARD}/{p}"
        try:
            links = extract_post_links_from_board(board_url, session=session)
        except Exception as e:
            print(f"[WARN] 讀取列表頁失敗 {board_url}: {e}")
            continue

        for u in links:
            if u in seen:
                continue
            seen.add(u)
            try:
                d = parse_post(u, session=session)
                d["page"] = p
                rows.append(d)
            except Exception as e:
                print(f"[WARN] 解析文章失敗 {u}: {e}")
            time.sleep(sleep_sec)
        time.sleep(sleep_sec)

    df = pd.DataFrame(rows, columns=["page","title","author","datetime","url"])
    df["datetime"] = pd.to_datetime(df["datetime"], errors="coerce")
    df = df.sort_values(["datetime","page"], ascending=[False, False], na_position="last").reset_index(drop=True)
    return df


In [22]:
def open_spreadsheet(sheet_url_or_id:str):
    if sheet_url_or_id.startswith("http"):
        sh = gc.open_by_url(sheet_url_or_id)
    else:
        sh = gc.open_by_key(sheet_url_or_id)
    return sh

def ensure_worksheets(sh):
    ws_names = [ws.title for ws in sh.worksheets()]
    if "工作表1" not in ws_names:
        sh.add_worksheet("工作表1", rows=1000, cols=10)
    if "stats" not in ws_names:
        sh.add_worksheet("工作表4", rows=1000, cols=10)

def write_raw_posts(sh, df):
    ws = sh.worksheet("工作表1")
    ws.clear()
    set_with_dataframe(ws, df)

def read_raw_posts(sh):
    ws = sh.worksheet("工作表1")
    df = get_as_dataframe(ws, evaluate_formulas=True)
    df = df.dropna(how="all")
    return df

def write_stats(sh, stats_df, meta_note:str=None, summary_text:str=None):
    ws = sh.worksheet("工作表4")
    ws.clear()
    set_with_dataframe(ws, stats_df)
    # 在下方附註 / 摘要
    rows = stats_df.shape[0] + 3
    notes = []
    if meta_note:
        notes.append(f"NOTE: {meta_note}")
    if summary_text:
        notes.append(f"SUMMARY:\n{summary_text}")
    if notes:
        ws.update_cell(rows, 1, "\n\n".join(notes))

In [23]:
# 粗略停用詞（你可自行擴充）
CN_STOPWORDS = set("""
的 了 和 與 及 在 是 有 也 就 都 而 及 及其 之 於 並 不 非 很 更 最 著 這 那 我 你 他 她 它 們
請 求 售 讓 收 換 票 台 北 高 雄 新 北 台中 台南 花蓮 宜蘭
""".split())

PUNCS = set(list("，。！？、：；（）()【】[]「」『』…—-–．．.·・/\\|_=+*^%$#@~`'\" "))

def jieba_tokenize(text):
    return [w.strip() for w in jieba.cut(text) if w.strip()]

def clean_tokens(tokens):
    out = []
    for t in tokens:
        if (t in CN_STOPWORDS) or (set(t) <= PUNCS) or (len(t) <= 1):
            continue
        out.append(t)
    return out

def build_corpus(df):
    # 以 title 為文本
    texts = df["title"].fillna("").astype(str).tolist()
    tokenized = [clean_tokens(jieba_tokenize(t)) for t in texts]
    return texts, tokenized

def compute_freq_and_tfidf(texts, tokenized, top_n=30):
    # 詞頻
    freq = Counter()
    for tokens in tokenized:
        freq.update(tokens)
    freq_pairs = freq.most_common(top_n)

    # TF-IDF：以自訂 tokenizer 餵給 TfidfVectorizer
    def identity(x): return x
    vectorizer = TfidfVectorizer(
        tokenizer=identity, preprocessor=identity, token_pattern=None,
        lowercase=False, min_df=1
    )
    X = vectorizer.fit_transform(tokenized)
    terms = vectorizer.get_feature_names_out()
    # 取每個詞的最大 tf-idf 當代表值
    max_tfidf = np.asarray(X.max(axis=0)).ravel()
    tfidf_pairs = sorted(zip(terms, max_tfidf), key=lambda x: x[1], reverse=True)[:top_n]

    # 合併成 DataFrame
    df_freq = pd.DataFrame(freq_pairs, columns=["token","freq"])
    df_tfidf = pd.DataFrame(tfidf_pairs, columns=["token","tfidf"])
    stats = df_freq.merge(df_tfidf, on="token", how="outer").fillna(0)
    stats = stats.sort_values(["freq","tfidf"], ascending=[False, False]).reset_index(drop=True)
    return stats


In [24]:
def gemini_summary(api_key:str, top_stats_df:pd.DataFrame, sample_titles:list, top_n:int):
    if not api_key:
        return "(未提供 Gemini API Key，略過生成。)"

    genai.configure(api_key=api_key)
    model = genai.GenerativeModel("gemini-1.5-flash")

    top_terms = ", ".join(top_stats_df["token"].head(top_n).tolist())
    sample_titles_text = "\n".join(f"- {t}" for t in sample_titles[:8])

    prompt = f"""
你是一位資料洞察顧問。根據下面 PTT 票務文章的熱門關鍵詞與標題樣本，請用繁體中文輸出：
1) 5 句洞察摘要（每句不超過 30 字）
2) 一段 120 字以內的結論（整合趨勢與建議）

【熱門關鍵詞】：
{top_terms}

【標題樣本（擇要）】：
{sample_titles_text}

請嚴格依照格式：
- 洞察(1)：
- 洞察(2)：
- 洞察(3)：
- 洞察(4)：
- 洞察(5)：
- 結論："""
    resp = model.generate_content(prompt)
    return resp.text.strip() if resp and getattr(resp, "text", None) else "(Gemini 無回應)"

In [25]:
def full_pipeline(sheet_url_or_id:str, start_page:int, how_many:int, top_n:int, gemini_api_key:str="", pause:float=0.5):
    # 1) 爬蟲
    df_posts = crawl_by_start_and_count(start_page=start_page, how_many=how_many, sleep_sec=pause)
    if df_posts.empty:
        return "未抓到任何文章，請更換頁碼。", None, None, None

    # 2) 寫入 Google Sheet
    sh = open_spreadsheet(sheet_url_or_id)
    ensure_worksheets(sh)
    write_raw_posts(sh, df_posts)

    # 3) 從 Sheet 讀回來（驗證 & 作為分析來源）
    df_src = read_raw_posts(sh)
    df_src["title"] = df_src["title"].astype(str).fillna("")

    # 4) 斷詞/詞頻/TF-IDF
    texts, tokenized = build_corpus(df_src)
    stats = compute_freq_and_tfidf(texts, tokenized, top_n=top_n)

    # 5) Gemini 摘要
    sample_titles = df_src["title"].dropna().astype(str).tolist()
    summary_text = gemini_summary(gemini_api_key, stats, sample_titles, top_n)

    # 6) 回寫統計
    meta = f"來源頁碼：{start_page} 往回 {how_many} 頁；Top N = {top_n}；時間：{datetime.now()}"
    write_stats(sh, stats, meta_note=meta, summary_text=summary_text)

    return f"✅ 完成！已回寫至工作表：工作表1 / 工作表4 \n{meta}", df_posts, stats, summary_text

In [26]:
with gr.Blocks(title="PTT 票務爬蟲 × TF-IDF × Gemini 摘要") as demo:
    gr.Markdown("## 🎫 PTT Drama-Ticket → Google Sheet → 熱詞分析（TF-IDF）→ Gemini 摘要")
    with gr.Row():
        sheet_in   = gr.Textbox(label="Google Spreadsheet（URL 或 ID）", placeholder="https://docs.google.com/spreadsheets/d/1YmuPBSwZVq0m0-lwpsjCEcyXik3WVeE7a23sRin5ojw/edit?usp=sharing")
    with gr.Row():
        start_page = gr.Number(value=8907, label="起始頁碼（大到小遞減）", precision=0)
        how_many   = gr.Number(value=2, label="往回抓幾頁", precision=0)
        top_n      = gr.Number(value=20, label="輸出前 N 熱詞", precision=0)
    with gr.Row():
        api_key    = gr.Textbox(label="repo", type="password", placeholder="AI 生成摘要用")
        pause_sec  = gr.Number(value=0.5, label="請求間隔秒數（禮貌性）", precision=1)

    run_btn = gr.Button("🚀 一鍵執行")
    status  = gr.Textbox(label="狀態")
    df_out  = gr.Dataframe(label="抓取結果（raw_posts）")
    stats_o = gr.Dataframe(label="熱詞統計（stats：freq / tfidf）")
    summ_o  = gr.Textbox(label="Gemini 洞察摘要 + 結論（也會回寫到 stats）", lines=10)

    def _run(sheet, sp, cnt, topn, key, pause):
        msg, df_posts, stats, summary = full_pipeline(
            sheet_url_or_id=sheet,
            start_page=int(sp),
            how_many=int(cnt),
            top_n=int(topn),
            gemini_api_key=key or "",
            pause=float(pause)
        )
        return msg, (df_posts if isinstance(df_posts, pd.DataFrame) else None), (stats if isinstance(stats, pd.DataFrame) else None), (summary or "")

    run_btn.click(_run, [sheet_in, start_page, how_many, top_n, api_key, pause_sec],
                  [status, df_out, stats_o, summ_o])

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://45f448f1c6d5a2eccf.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


