In [6]:
import requests
from bs4 import BeautifulSoup
import re

def get_latest_period_and_url():
    url = 'https://invoice.etax.nat.gov.tw/index.html'
    web = requests.get(url)
    web.encoding = 'utf-8'
    soup = BeautifulSoup(web.text, 'html.parser')

    # 抓最新一期期別字串
    submenu = soup.select_one('ul.etw-submenu01 > li > a.etw-on')
    latest_period = None
    if submenu:
        text = submenu.get_text(strip=True)
        m = re.search(r'(\d{3}年\d{2}-\d{2}月)', text)
        if m:
            latest_period = m.group(1)
    latest_url = url

    # 抓上一期網址和期別 (就是ul.etw-submenu01的第二個a標籤)
    prev_a = soup.select_one('ul.etw-submenu01 > li:nth-child(2) > a')
    prev_period = None
    prev_url = None
    if prev_a:
        prev_period = prev_a.get_text(strip=True)
        prev_url = 'https://invoice.etax.nat.gov.tw/' + prev_a['href']

    return (latest_period, latest_url), (prev_period, prev_url)

def get_invoice_numbers_by_url(url):
    web = requests.get(url)
    web.encoding = 'utf-8'
    soup = BeautifulSoup(web.text, 'html.parser')
    td = soup.find_all(class_='etw-tbiggest')
    if len(td) < 5:
        print(f"抓取格式異常，頁面: {url}")
        return None
    ns = td[0].get_text(strip=True)
    n1 = td[1].get_text(strip=True)
    n2 = []
    for i in range(2, 5):
        spans = td[i].find_all('span')
        if len(spans) == 2:
            number = spans[0].get_text(strip=True) + spans[1].get_text(strip=True)
            n2.append(number)
        else:
            n2.append(td[i].get_text(strip=True))
    return {
        '特別獎': ns,
        '特獎': n1,
        '頭獎': n2
    }

# 使用範例
(latest_period, latest_url), (prev_period, prev_url) = get_latest_period_and_url()

print(f"最新期別：{latest_period}，網址：{latest_url}")
latest_data = get_invoice_numbers_by_url(latest_url)
if latest_data:
    print("最新期別中獎號碼：")
    print("特別獎：", latest_data['特別獎'])
    print("特獎：", latest_data['特獎'])
    print("頭獎：", ", ".join(latest_data['頭獎']))

print()

print(f"上一期期別：{prev_period}，網址：{prev_url}")
prev_data = get_invoice_numbers_by_url(prev_url)
if prev_data:
    print("上一期中獎號碼：")
    print("特別獎：", prev_data['特別獎'])
    print("特獎：", prev_data['特獎'])
    print("頭獎：", ", ".join(prev_data['頭獎']))



最新期別：114年03-04月，網址：https://invoice.etax.nat.gov.tw/index.html
最新期別中獎號碼：
特別獎： 64557267
特獎： 64808075
頭獎： 04322277, 07903676, 98883497

上一期期別：114年01-02月中獎號碼單，網址：https://invoice.etax.nat.gov.tw/lastNumber.html
上一期中獎號碼：
特別獎： 95980685
特獎： 37166026
頭獎： 78394633, 26503878, 39200954
