<a href="https://colab.research.google.com/github/41371120h/PL-Repo.peng/blob/main/HW4_%E5%AD%B8%E7%BF%92%E5%B9%B3%E5%8F%B0%E5%8A%A0%E7%95%AA%E8%8C%84%E9%90%98%E7%88%AC%E8%9F%B2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
!pip install gradio gspread google-auth google-auth-oauthlib google-auth-httplib2 pandas scikit-learn jieba beautifulsoup4 requests google-generativeai



In [10]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)


In [11]:
import pandas as pd
import jieba
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
import gradio as gr
import google.generativeai as genai

# === 你的 Google 試算表網址 ===
SHEET_URL = "https://docs.google.com/spreadsheets/d/107FcjXEnPn7vM10qFPj-wQFPeeNUOWTwKk5A-ejJqo4/edit#gid=0"

# 開啟試算表
sh = gc.open_by_url(SHEET_URL)
try:
    worksheet = sh.worksheet("RawData")
except:
    worksheet = sh.add_worksheet(title="RawData", rows="100", cols="10")

try:
    stat_sheet = sh.worksheet("Analysis")
except:
    stat_sheet = sh.add_worksheet(title="Analysis", rows="100", cols="10")

# === TF-IDF 參數 ===
TOP_N = 20


In [12]:
def crawl_yahoo_news():
    url = "https://tw.stock.yahoo.com/news/"
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")
    articles = soup.select("a[href*='news']")[:10]

    data = []
    for a in articles:
        link = a["href"]
        if not link.startswith("http"):
            link = "https://tw.stock.yahoo.com" + link
        try:
            sub_res = requests.get(link)
            sub_soup = BeautifulSoup(sub_res.text, "html.parser")
            title = sub_soup.select_one("h1").text if sub_soup.select_one("h1") else ""
            content = " ".join([p.text for p in sub_soup.select("p")])
            data.append({"title": title, "url": link, "content": content})
        except Exception as e:
            print("錯誤：", e)
    return pd.DataFrame(data)


In [13]:
def analyze_tfidf(df):
    corpus = [" ".join(jieba.cut(text)) for text in df["content"]]
    vectorizer = TfidfVectorizer(max_features=TOP_N)
    X = vectorizer.fit_transform(corpus)
    keywords = vectorizer.get_feature_names_out()
    tfidf_values = X.sum(axis=0).A1
    result = pd.DataFrame({"word": keywords, "score": tfidf_values})
    result = result.sort_values(by="score", ascending=False).reset_index(drop=True)
    return result


In [14]:
# 🔸 登入你的 Gemini API（第一次要設定 API key）
genai.configure(api_key="你的_Gemini_API_KEY")  # ← 這裡要貼上你的 Gemini API key

def generate_insights(text):
    model = genai.GenerativeModel("gemini-1.5-flash")
    prompt = f"""
    以下是近期股市新聞內容摘要：
    {text[:3000]}
    請用中文生成：
    1️⃣ 五句洞察（條列式）
    2️⃣ 一段約 120 字的結論分析
    """
    response = model.generate_content(prompt)
    return response.text


In [15]:
def run_pipeline():
    df = crawl_yahoo_news()
    worksheet.update([df.columns.values.tolist()] + df.values.tolist())

    tfidf_result = analyze_tfidf(df)
    stat_sheet.update([tfidf_result.columns.values.tolist()] + tfidf_result.values.tolist())

    insights = generate_insights(" ".join(df["content"]))
    return tfidf_result.head(10), insights


In [16]:
def run_all():
    tfidf, insights = run_pipeline()
    top_words = "\n".join([f"{i+1}. {w}" for i, w in enumerate(tfidf["word"].tolist())])
    return top_words, insights

iface = gr.Interface(
    fn=run_all,
    inputs=[],
    outputs=[
        gr.Textbox(label="🔥 前 10 熱詞"),
        gr.Textbox(label="🧠 Gemini 洞察與結論")
    ],
    title="Yahoo 股市新聞自動化分析系統（Colab 版）",
    description="一鍵執行：抓取 Yahoo 新聞 → 詞頻分析 → Gemini 中文洞察"
)

iface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9b282fc685acd33750.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


