<a href="https://colab.research.google.com/github/NJR203Group2/ntnu_artmsu/blob/main/NTNUArt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import re
import csv
import requests as req
from bs4 import BeautifulSoup as bs

BASE_URL = "https://www.artmuse.ntnu.edu.tw/index.php/current_exhibit/"

"""抓取師大美術館的館名、地址、開放時間、休館時間"""
def museum_info(base_url: str):
    r = req.get(base_url, timeout=15)
    r.raise_for_status()
    html = bs(r.text, "html.parser")

    # 館名
    NTNU = html.find("h4", class_="widget-title")
    ntnu_text = NTNU.text.strip() if NTNU else "師大美術館 NTNU Art Museum"

    #地址
    address = html.find_all("div", style="line-height: 1.5;")
    address_text = address[1].get_text().strip().split("：", 1)[-1]

    # 開放 / 休館時間（兩行開放時間 + 一行休館）
    time_blocks = html.find_all("p", style="margin-bottom: 4px;")
    open_time_text, off_time_text = None, None
    if time_blocks:
        # 最後一項是休館
        offtime_tag = time_blocks.pop()
        off_time_text = offtime_tag.get_text().split("：", 1)[-1]  if offtime_tag else None

        opentime_tag = time_blocks
        open_list = []
        for i in opentime_tag:
          opentime_text = i.get_text().strip()
          open_list.append(opentime_text)
        open_time_text = f"{open_list[0].split("：", 1)[-1] }, {open_list[1]}"

    return ntnu_text, address_text, open_time_text, off_time_text

"""抓取當期展覽清單（標題、網址、圖片）"""
def get_exhibitions(base_url: str):
    r = req.get(base_url, timeout=15)
    r.raise_for_status()
    html = bs(r.text, "html.parser")
    figures = html.find_all("figure", class_="wp-caption")

    exhibitions = []
    for f in figures:
        a = f.find("a")
        img = f.find("img")
        cap = f.find("figcaption")

        link = a["href"] if a and a.has_attr("href") else None
        image = img["src"] if img and img.has_attr("src") else None
        title = cap.get_text(strip=True) if cap else None

        exhibitions.append({"展覽名稱": title, "網址": link, "圖片": image})
    return exhibitions

"""從展覽內頁抓取展覽日期與地點（統一從 entry clr 文字中用正則擷取）"""
def get_time_and_place(exh_url: str):
    r = req.get(exh_url, timeout=20)
    r.raise_for_status()
    soup = bs(r.text, "html.parser")
    entry = soup.find("div", class_="entry clr")

    # 取純文字並簡單清理
    text = entry.get_text("\n", strip=True)

    time_text = None
    place_text = None

    m_time = re.search(r"(展覽時間|時間)[:：]\s*([^\n。]+)", text)
    time_text = m_time.group(2).strip()

    m_place = re.search(r"(展覽地點|地點)[:：]\s*([^\n。]+)", text)
    place_text = m_place.group(2).strip()

    return time_text, place_text



def main():
    # Step 1: 館別資訊
    museum_name, address_text, open_time, off_time = museum_info(BASE_URL)

    # Step 2: 展覽清單
    exhibitions = get_exhibitions(BASE_URL)

    # Step 3: 抓取每個展覽的時間與地點
    for ex in exhibitions:
        if ex.get("網址"):
            time_text, place_text = get_time_and_place(ex["網址"])
        else:
            time_text, place_text = None, None

        ex["館別"] = museum_name
        ex["地址"] = address_text
        ex["開放時間"] = open_time
        ex["閉館時間"] = off_time
        ex["展覽日期"] = time_text
        ex["展覽地點"] = place_text

    # Step 4: 輸出 CSV
    OUTPUT = "./output"
    os.makedirs(OUTPUT, exist_ok=True)
    CSV_PATH = os.path.join(OUTPUT, "NTNU_exhibitions.csv")

    fieldnames = ["館別", "地址", "開放時間", "閉館時間", "展覽名稱", "網址", "圖片", "展覽日期", "展覽地點",]
    with open(CSV_PATH, "w", encoding="utf-8-sig", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(exhibitions)

    print(f"✅ 已輸出 {CSV_PATH}，共 {len(exhibitions)} 筆展覽。")


if __name__ == "__main__":
    main()


✅ 已輸出 ./output/NTNU_exhibitions.csv，共 2 筆展覽。


In [None]:
import os
import re
import json
import requests as req
import bs4 as bs

url = "https://www.artmuse.ntnu.edu.tw/index.php/current_exhibit/"
resp = req.get(url)
html = bs.BeautifulSoup(resp.text, "html.parser")
NTNU = html.find("h4", class_="widget-title")
address = html.find_all("div", style="line-height: 1.5;")
address_text = address[1].get_text().strip().split("：", 1)[-1]
print(address_text)
ntnu_text = NTNU.text.strip() if NTNU else None
print(ntnu_text)

time_blocks = html.find_all("p", style="margin-bottom: 4px;")

offtime = time_blocks.pop()
opentime = time_blocks
open_list = []
for i in opentime:
  opentime_text = i.get_text().strip()
  open_list.append(opentime_text)
opentime_text = f"{open_list[0].split("：", 1)[-1] }, {open_list[1]}"

offtime_text = offtime.get_text().split("：", 1)[-1]
print(opentime_text)
print(offtime_text)


106010 臺北市大安區和平東路一段129號
師大美術館 NTNU Art Museum
週二至週五 09:30-17:30, 週六、週日 10:00-17:30
週一及國定假日


In [None]:
import os
import re
import json
import requests as req
import bs4 as bs

url = "https://www.artmuse.ntnu.edu.tw/index.php/current_exhibit/"
resp = req.get(url)
html = bs.BeautifulSoup(resp.text, "html.parser")
figures = html.find_all("figure", class_="wp-caption")

exhibitions = []

for f in figures:
  link = f.find("a")["href"] if f.find("a") else None
  img = f.find("img")["src"] if f.find("img") else None
  title = f.find("figcaption").text.strip() if f.find("figcaption") else None

  exhibitions.append({
        "title": title,
        "url": link,
        "image": img
    })

# 印出結果
for e in exhibitions:
    print(f"標題：{e['title']}")
    print(f"網址：{e['url']}")
    print(f"圖片：{e['image']}")
    print("-" * 60)


標題：心象繪畫──高行健
網址：https://www.artmuse.ntnu.edu.tw/index.php/2025visioninterieure/
圖片：https://www.artmuse.ntnu.edu.tw/wp-content/uploads/elementor/thumbs/1280x800-scaled-rarluzfsf50hegxxzgasu9fetxsh89h2swro436zb4.png
------------------------------------------------------------
標題：時光逆旅：文物修護的美學與實踐
網址：https://www.artmuse.ntnu.edu.tw/index.php/elementor-10351/
圖片：https://www.artmuse.ntnu.edu.tw/wp-content/uploads/elementor/thumbs/EDM_A款_20250627-r85qfot4tmlyctuutjwsck7f94yzrei6vb16lon680.jpg
------------------------------------------------------------


In [None]:
import os
import json
import requests as req
import bs4 as bs

url = "https://www.artmuse.ntnu.edu.tw/index.php/elementor-10351/"
resp = req.get(url)
html = bs.BeautifulSoup(resp.text, "html.parser")
entry = html.find("div", class_="entry clr")
text = entry.get_text("\n", strip=True)
m_time = re.search(r"(展覽時間|時間)[:：]\s*([^\n。]+)", text)
time_text = m_time.group(2).strip()
print(m_time)
print(time_text)

<re.Match object; span=(232, 249), match='展覽時間：2024/7/1（二）起'>
2024/7/1（二）起


In [None]:
import os
import json
import requests as req
import bs4 as bs
url = "https://www.artmuse.ntnu.edu.tw/index.php/2025visioninterieure/"
resp = req.get(url)
html = bs.BeautifulSoup(resp.text, "html.parser")
figures = html.find_all("div", class_="entry clr")
print(figures)

[<div class="entry clr" itemprop="text">
<p></p>
<figure class="wp-block-image size-full"><img alt="" class="wp-image-11103" decoding="async" fetchpriority="high" height="1600" sizes="(max-width: 2560px) 100vw, 2560px" src="https://www.artmuse.ntnu.edu.tw/wp-content/uploads/2025/08/官網當期展頁＿高行健1280x800-scaled.png" srcset="https://www.artmuse.ntnu.edu.tw/wp-content/uploads/2025/08/官網當期展頁＿高行健1280x800-scaled.png 2560w, https://www.artmuse.ntnu.edu.tw/wp-content/uploads/2025/08/官網當期展頁＿高行健1280x800-300x188.png 300w, https://www.artmuse.ntnu.edu.tw/wp-content/uploads/2025/08/官網當期展頁＿高行健1280x800-1024x640.png 1024w, https://www.artmuse.ntnu.edu.tw/wp-content/uploads/2025/08/官網當期展頁＿高行健1280x800-768x480.png 768w, https://www.artmuse.ntnu.edu.tw/wp-content/uploads/2025/08/官網當期展頁＿高行健1280x800-1536x960.png 1536w, https://www.artmuse.ntnu.edu.tw/wp-content/uploads/2025/08/官網當期展頁＿高行健1280x800-2048x1280.png 2048w" width="2560"/></figure>
<div aria-hidden="true" class="wp-block-spacer" style="height:67px"></d

In [None]:
def get_time_and_place(exh_url):
    """從展覽內頁抓取時間與地點"""
    try:
        r = req.get(exh_url)
        soup = bs(r.text, "html.parser")
        entry = soup.find("div", class_="entry clr")
        if not entry:
            return None, None

        text = entry.get_text("\n", strip=True)

        # 抓出展覽時間
        time_match = re.search(r"(展覽時間|時間)[:：]\s*([^\n。；;]+)", text)
        time_text = time_match.group(2).strip() if time_match else None

        # 抓出展覽地點
        place_match = re.search(r"(展覽地點|地點)[:：]\s*([^\n。；;]+)", text)
        place_text = place_match.group(2).strip() if place_match else None

        return time_text, place_text

    except Exception as e:
        return None, None