In [7]:
import os
import re



# 文件匹配规则（Tab1_*.html 等）
html_files = [f for f in os.listdir(BASE_DIR) if f.lower().endswith(".html") and f.lower().startswith("tab")]

# 创建资源目录
os.makedirs(os.path.join(BASE_DIR, "css"), exist_ok=True)
os.makedirs(os.path.join(BASE_DIR, "js"), exist_ok=True)

# 正则提取 style 和 script（不含 src 的 script）
style_pattern = re.compile(r"<style.*?>(.*?)</style>", re.DOTALL)
script_pattern = re.compile(r"<script(?![^>]*src).*?>(.*?)</script>", re.DOTALL)

for html_file in html_files:
    with open(os.path.join(BASE_DIR, html_file), "r", encoding="utf-8") as f:
        content = f.read()

    base_name = os.path.splitext(html_file)[0].lower()  # e.g., tab1_3dglobe

    # 提取 <style> 内容并写入 CSS 文件
    styles = style_pattern.findall(content)
    if styles:
        css_path = os.path.join("css", f"{base_name}.css")
        with open(os.path.join(BASE_DIR, css_path), "w", encoding="utf-8") as f_css:
            f_css.write("\n\n".join(styles))
        content = style_pattern.sub("", content)
        css_link = f'<link rel="stylesheet" href="{css_path}">'
        content = content.replace("</head>", f"  {css_link}\n</head>")

    # 提取 <script> 内容并写入 JS 文件
    scripts = script_pattern.findall(content)
    if scripts:
        js_path = os.path.join("js", f"{base_name}.js")
        with open(os.path.join(BASE_DIR, js_path), "w", encoding="utf-8") as f_js:
            f_js.write("\n\n".join(scripts))
        content = script_pattern.sub("", content)
        js_link = f'<script src="{js_path}"></script>'
        content = content.replace("</body>", f"  {js_link}\n</body>")

    # 保存修改后的 HTML
    with open(os.path.join(BASE_DIR, html_file), "w", encoding="utf-8") as f_out:
        f_out.write(content)

    print(f"✔ Processed {html_file}:")
    if styles:
        print(f"  → Extracted CSS: {css_path}")
    if scripts:
        print(f"  → Extracted JS:  {js_path}")


✔ Processed Tab2_Europe.html:
  → Extracted CSS: css/tab2_europe.css
  → Extracted JS:  js/tab2_europe.js
✔ Processed Tab3_City.html:
  → Extracted CSS: css/tab3_city.css
  → Extracted JS:  js/tab3_city.js


In [8]:
import os
import re

# 假设当前目录有 index.html
html_file = os.path.join(BASE_DIR, "index.html")

# 创建目录
css_dir = os.path.join(BASE_DIR, "css")
js_dir = os.path.join(BASE_DIR, "js")
os.makedirs(css_dir, exist_ok=True)
os.makedirs(js_dir, exist_ok=True)

# 正则提取 <style> 和 非 src 的 <script>
style_pattern = re.compile(r"<style.*?>(.*?)</style>", re.DOTALL)
script_pattern = re.compile(r"<script(?![^>]*src).*?>(.*?)</script>", re.DOTALL)

# 读取 HTML 内容
with open(html_file, "r", encoding="utf-8") as f:
    content = f.read()

# 提取 <style>
styles = style_pattern.findall(content)
if styles:
    css_path = os.path.join(css_dir, "index.css")
    with open(css_path, "w", encoding="utf-8") as f_css:
        f_css.write("\n\n".join(styles))
    content = style_pattern.sub("", content)
    css_link = '<link rel="stylesheet" href="css/index.css">'
    content = content.replace("</head>", f"  {css_link}\n</head>")

# 提取 <script> 并包裹 DOMContentLoaded
scripts = script_pattern.findall(content)
if scripts:
    js_wrapped = (
        "document.addEventListener('DOMContentLoaded', function() {\n"
        + "\n\n".join(scripts)
        + "\n});"
    )
    js_path = os.path.join(js_dir, "index.js")
    with open(js_path, "w", encoding="utf-8") as f_js:
        f_js.write(js_wrapped)
    content = script_pattern.sub("", content)
    js_script = '<script src="js/index.js"></script>'
    content = content.replace("</body>", f"  {js_script}\n</body>")

# 写回修改后的 HTML
with open(html_file, "w", encoding="utf-8") as f_out:
    f_out.write(content)

print("✔ 提取完成：index.html → css/index.css, js/index.js")


✔ 提取完成：index.html → css/index.css, js/index.js
