In [32]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import csv
from datetime import date
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

# 上期所交易数据端口
SHFE_daily = 'https://www.shfe.com.cn/reports/tradedata/dailyandweeklydata'
OUTPUT_DIR = "CatchedData"  # 使用相对路径
today = date.today().strftime('%Y%m%d')
OUTPUT_FILE = os.path.join(OUTPUT_DIR, today[:4], today[4:6], today + "SHFE_OUTPUT.csv")

def page_catch():
    """爬取网页并返回 BeautifulSoup 对象，使用显式等待。"""
    options = webdriver.EdgeOptions()
    options.add_argument("--headless=new") #设置无头模式
    try:
        app = webdriver.Edge(options=options)
        app.get(SHFE_daily)
        # 显式等待，最多等待 10 秒，直到表格元素加载完成
        WebDriverWait(app, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'div.kx_index_table'))
        )
        soup = BeautifulSoup(app.page_source, 'html.parser')
        app.quit() # 使用 quit() 关闭浏览器，释放资源
        return soup
    except Exception as e:
        print(f"爬取页面出错: {e}")
        return None

def extract_csv(soup):
    """从 BeautifulSoup 对象中提取表格数据并保存到 CSV 文件。"""
    if not soup:
        return

    # 正确创建所有需要的目录
    os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)

    with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)

        tables = soup.select('div.kx_index_table')
        for table in tables: # 循环处理每个表格
            headers = table.find('thead')
            if headers:
                header_row = [header.text.strip() for header in headers.find_all('th')]
                writer.writerow(header_row)

            rows = table.find('tbody').find_all('tr') if table.find('tbody') else table.find_all('tr')
            for row in rows:
                csv_row = []
                for cell in row.find_all(['td', 'th']):
                    csv_row.append(cell.text.strip())
                writer.writerow(csv_row)

def main():
    soup = page_catch()
    if soup:
        extract_csv(soup)
        print(f"数据已保存到 {OUTPUT_FILE}")
    else:
        print("没有获取到数据")

if __name__ == "__main__":
    main()

数据已保存到 CatchedData\2025\01\20250107SHFE_OUTPUT.csv
