In [1]:
!pip install selenium

Collecting selenium
  Downloading selenium-4.38.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio<1.0,>=0.31.0 (from selenium)
  Downloading trio-0.32.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket<1.0,>=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting outcome (from trio<1.0,>=0.31.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket<1.0,>=0.12.2->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.38.0-py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.32.0-py3-none-any.whl (512 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m512.0/512.0 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)
Downloadin

In [6]:
# =============================================
# Nikkei225（6か月）データ取得：安定版・完全セル（Colab/ローカル対応）
#  - 指定関数: extract_stock_data(html), get_stock_values(driver, url)
#  - /element/.../rect を極力使わず JS の getBoundingClientRect() で安定化
#  - 収集は「ホバー中は DOM を間引き保存 → 最後に一括解析」でタイムアウト回避
# =============================================

!pip -q install selenium beautifulsoup4

import os, sys, re, time, shlex, subprocess, shutil
from datetime import datetime
from typing import List, Optional, Dict
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

URL       = "https://www.nikkei.com/markets/worldidx/chart/nk225/?type=6month"
HEADLESS  = True
WINDOW    = "1400,960"
WAIT_S    = 20
STEP_PX   = 2     # ←ホバー移動のピッチ（大きくすると速く/粗く）
SLEEP_SEC = 0.004 # ←1ステップ間の待ち
IN_COLAB  = "google.colab" in sys.modules

def _run(cmd: str):
    p = subprocess.run(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    print(f"$ {cmd}\n{p.stdout}")
    return p.returncode == 0

# ---------- Colab で Chrome を確実に用意 ----------
CHROME_BIN = shutil.which("google-chrome") or shutil.which("google-chrome-stable")
if IN_COLAB and not CHROME_BIN:
    _run("apt-get update -y")
    # 公式 .deb を直接インストール（依存も自動解決）
    _run("wget -q https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb")
    ok = _run("apt-get -y install ./google-chrome-stable_current_amd64.deb")
    if not ok:
        _run("apt-get -y -f install")
        _run("apt-get -y install ./google-chrome-stable_current_amd64.deb")
    CHROME_BIN = shutil.which("google-chrome") or shutil.which("google-chrome-stable") or "/usr/bin/google-chrome"
    _run(f"{CHROME_BIN} --version")

# ---------- Step2: HTML → 値 抽出 ----------
def extract_stock_data(html: str) -> Optional[List[str]]:
    """
    HTMLテキストから [日付, 始値, 高値, 安値, 終値] を抽出（YYYY-MM-DD, 値は文字列）
    取得できなければ None
    """
    soup = BeautifulSoup(html, "html.parser")
    text = soup.get_text(" ", strip=True)

    m = re.search(r"(\d{4})[./年-]\s*(\d{1,2})[./月-]\s*(\d{1,2})", text)
    if not m:
        return None
    y, mo, d = map(int, m.groups())
    date_str = f"{y:04d}-{mo:02d}-{d:02d}"

    def pick(label: str) -> Optional[str]:
        p = re.search(label + r"\s*[:：=]?\s*([0-9][0-9,\.]*)", text)
        return p.group(1) if p else None

    open_ = pick("始値")
    high  = pick("高値")
    low   = pick("安値")
    close = pick("終値") or (re.search(r"終値[^0-9]*([0-9][0-9,\.]*)", text) or [None, None])[1]

    if not all([open_, high, low, close]):
        return None
    return [date_str, open_, high, low, close]

# ---------- Helper: /rect を使わない矩形取得 & 安定待ち ----------
def _rect(driver, el):
    # JS の getBoundingClientRect() でサイズ/位置を取得（高速・安定）
    return driver.execute_script("""
        const r = arguments[0].getBoundingClientRect();
        return {x: r.x, y: r.y, width: Math.floor(r.width), height: Math.floor(r.height)};
    """, el)

def _wait_chart_ready(driver, el, tries=20, interval=0.2):
    # 幅・高さが十分かつ 2回連続でほぼ不変＝描画安定 とみなす
    last = None
    stable = 0
    for _ in range(tries):
        r = _rect(driver, el)
        if r["width"] >= 300 and r["height"] >= 200:
            if last and abs(r["width"]-last["width"]) < 2 and abs(r["height"]-last["height"]) < 2:
                stable += 1
                if stable >= 2:
                    return r
            else:
                stable = 0
            last = r
        time.sleep(interval)
    return last or _rect(driver, el)

# ---------- Step3: ページアクセス & 収集 ----------
def _make_driver(headless: bool = HEADLESS) -> webdriver.Chrome:
    opts = Options()
    opts.add_argument(f"--window-size={WINDOW}")
    if headless:
        opts.add_argument("--headless=new")
    opts.add_argument("--disable-dev-shm-usage")
    opts.add_argument("--disable-gpu")
    opts.add_argument("--no-first-run")
    opts.add_argument("--no-default-browser-check")
    opts.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
    opts.page_load_strategy = "eager"
    if IN_COLAB and CHROME_BIN:
        opts.binary_location = CHROME_BIN
        opts.add_argument("--no-sandbox")

    driver = webdriver.Chrome(options=opts)   # Selenium Manager が自動でDriver解決
    driver.set_page_load_timeout(60)
    # urllib3 の read timeout 相当を延長（/rect 再試行を避ける保険）
    try:
        driver.command_executor._timeout = 180
    except Exception:
        pass
    return driver

def _find_chart(driver):
    cvs = driver.find_elements(By.TAG_NAME, "canvas")
    if cvs:
        cvs.sort(key=lambda e: e.size["width"]*e.size["height"], reverse=True)
        return cvs[0]
    svgs = driver.find_elements(By.TAG_NAME, "svg")
    if svgs:
        svgs.sort(key=lambda e: e.size["width"]*e.size["height"], reverse=True)
        return svgs[0]
    raise RuntimeError("チャート領域(canvas/svg)が見つかりません。")

def _start_dom_capture(driver):
    # ホバー中は body.innerText を間引き収集（差分のみ保存）
    driver.execute_script("""
      (function(){
        if (window.__nikkeiTimer) clearInterval(window.__nikkeiTimer);
        window.__nikkeiData = [];
        window.__nikkeiLast = '';
        window.__nikkeiTimer = setInterval(function(){
          var t = document.body ? (document.body.innerText || document.body.textContent || '') : '';
          if (t && t !== window.__nikkeiLast){
            window.__nikkeiLast = t;
            window.__nikkeiData.push(t);
          }
        }, 60);
      })();
    """)

def _stop_dom_capture(driver):
    try:
        driver.execute_script("if(window.__nikkeiTimer) clearInterval(window.__nikkeiTimer);")
    except Exception:
        pass
    return driver.execute_script("return (window.__nikkeiData||[]);")

def get_stock_values(driver: webdriver.Chrome, url: str) -> List[List[str]]:
    """
    指定URLを開き、チャート右端→左へ少しずつカーソル移動しながら
    DOMテキストを収集→extract_stock_data()で解析し、重複日付を排除して返す。
    """
    driver.get(url)
    WebDriverWait(driver, WAIT_S).until(EC.presence_of_element_located((By.TAG_NAME, "body")))

    chart = _find_chart(driver)
    WebDriverWait(driver, WAIT_S).until(EC.visibility_of(chart))

    # /rect を最小化：まず中央へスクロール → 描画安定待ち → JSでサイズ取得
    driver.execute_script("arguments[0].scrollIntoView({block:'center', inline:'center'});", chart)
    r = _wait_chart_ready(driver, chart)
    w, h = int(r["width"]), int(r["height"])

    actions = ActionChains(driver)
    # 最初の一度だけ move_to_element_with_offset を使い、その後は相対移動のみ
    actions.move_to_element_with_offset(chart, max(1, w//2), max(1, h//2)).perform()
    time.sleep(0.15)
    actions.move_by_offset((w//2)-3, 0).perform()
    time.sleep(0.15)

    _start_dom_capture(driver)

    steps = max(40, (w//2)//STEP_PX)
    for _ in range(steps):
        actions.move_by_offset(-STEP_PX, 0).perform()
        time.sleep(SLEEP_SEC)

    texts = _stop_dom_capture(driver)

    # 解析：同一日付は最後の値で上書き（右→左へ進むため）
    results: Dict[str, List[str]] = {}
    for t in texts:
        row = extract_stock_data(t)
        if row:
            results[row[0]] = row

    return [results[k] for k in sorted(results.keys())]

# ---------- Step4: main ----------
start = time.time()
driver = _make_driver(HEADLESS)
rows: List[List[str]] = []
try:
    rows = get_stock_values(driver, URL)
finally:
    try:
        driver.quit()
    except Exception:
        pass
end = time.time()

print(f"開始: {datetime.fromtimestamp(start)}")
print(f"終了: {datetime.fromtimestamp(end)}")
print(f"所要時間: {end - start:.2f} 秒")

# 課題指定：日付, 始値, 高値, 安値, 終値 の順で表示
for r in rows:
    print(*r, sep=", ")

# （任意）CSV保存したい場合は以下を有効化
# import pandas as pd
# df = pd.DataFrame(rows, columns=["日付","始値","高値","安値","終値"])
# df.to_csv("nikkei6m.csv", index=False, encoding="utf-8-sig")
# print("saved: nikkei6m.csv")


開始: 2025-11-02 09:27:15.517435
終了: 2025-11-02 09:28:09.913798
所要時間: 54.40 秒
2025-10-31, 51,629.8, 52,411.34, 51,613.03, 52,411.34
