In [1]:
# ライブラリ(Selenium)のインポート
from selenium import webdriver
from time import sleep

# ライブラリ(Pandas)のインポート
import pandas as pd
from datetime import datetime as dt

# ドライバのロード
path = 'C:\\Users\\Owner\\chromedriver'
driver = webdriver.Chrome(path)

# 指定サイトのオープン
url = 'https://finance.yahoo.co.jp/quote/6701.T/history'
driver.get(url)

# メインのデータフレームに最初のページのデータ取得
data = pd.read_html(driver.current_url, header=0)
main = data[0]

# 画面操作ならびにデータ取得
try:
    # ボタンが押下可能な限りループ処理
    while True:
        # ボタン取得
        path = '//*[@id="pagerbtm"]/ul/li[7]/button'
        btn = driver.find_element_by_xpath(path)
        
        # 最終ページでボタンが押下不可か判定
        if btn.get_attribute('disabled') != None:
            break
        else:
            # 画面全高(100%)の25%程までスクロール
            driver.execute_script('window.scrollTo(0, document.body.scrollHeight * 0.25);')
            sleep(1)
            # 次画面遷移
            btn.click()
            sleep(1)
            # データ取得と結合
            data = pd.read_html(driver.current_url, header=0)
            main = main.append(data[0], ignore_index=True)
except Exception:
    print('例外発生')

# 欠損値(NaN)を削除
main.dropna(inplace=True)

# 日付型のフィールド「修正日付」を追加
main['修正日付'] = [dt.strptime(val, "%Y年%m月%d日") for val in main['日付']]

# 不要なフィールド「日付」を削除
main.drop('日付', axis=1, inplace=True)

# フィールド「修正日付」をインデックス指定
main.set_index('修正日付', inplace=True)

# フィールド「修正日付」をキーに昇順にソート
main.sort_index(ascending=True, inplace=True)

In [2]:
# 全データ表示
main

Unnamed: 0_level_0,始値,高値,安値,終値,出来高,調整後終値*
修正日付,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-07,6180,6200,6060,6080,1197600,6080
2021-09-08,6090,6100,5990,6040,1332200,6040
2021-09-09,5990,6030,5950,5970,1089000,5970
2021-09-10,5990,6200,5990,6200,1697400,6200
2021-09-13,6170,6200,6120,6140,816000,6140
...,...,...,...,...,...,...
2022-09-01,5050,5080,4990,4995,1252000,4995
2022-09-02,4990,5000,4915,4955,1177200,4955
2022-09-05,4945,4960,4905,4940,830500,4940
2022-09-06,4930,4945,4885,4920,787300,4920


In [20]:
# 終値が6250以上、6300未満のデータ表示
main[(main['終値'] >= 6250) & (main['終値'] < 6300)]

Unnamed: 0_level_0,始値,高値,安値,終値,出来高,調整後終値*
修正日付,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-24,6170,6280,6170,6270,1135200,6270
2021-09-27,6270,6340,6250,6280,747000,6280
2021-10-15,6130,6280,6110,6270,1274100,6270
2021-10-18,6260,6290,6230,6280,751400,6280
2021-10-19,6300,6330,6260,6260,618700,6260
2021-10-22,6210,6280,6200,6250,657400,6250
2021-10-26,6250,6310,6230,6290,645500,6290


In [31]:
# 修正日付が「2021年10月」のデータ表示
main['2021-10']

Unnamed: 0_level_0,始値,高値,安値,終値,出来高,調整後終値*
修正日付,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-01,6030,6060,5960,5970,886200,5970
2021-10-04,6060,6100,5970,6040,1117800,6040
2021-10-05,6020,6030,5780,5870,1287300,5870
2021-10-06,5970,6020,5830,5860,1398400,5860
2021-10-07,5890,5950,5810,5830,1044700,5830
2021-10-08,5890,6060,5890,6010,1093200,6010
2021-10-11,6020,6120,5970,6070,961600,6070
2021-10-12,6010,6070,5970,6040,770000,6040
2021-10-13,6000,6060,5990,6010,630100,6010
2021-10-14,6030,6090,6020,6050,703700,6050


In [22]:
# 修正日付が「2021年10月」のデータ表示
main[main.index.month == 10]

Unnamed: 0_level_0,始値,高値,安値,終値,出来高,調整後終値*
修正日付,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-01,6030,6060,5960,5970,886200,5970
2021-10-04,6060,6100,5970,6040,1117800,6040
2021-10-05,6020,6030,5780,5870,1287300,5870
2021-10-06,5970,6020,5830,5860,1398400,5860
2021-10-07,5890,5950,5810,5830,1044700,5830
2021-10-08,5890,6060,5890,6010,1093200,6010
2021-10-11,6020,6120,5970,6070,961600,6070
2021-10-12,6010,6070,5970,6040,770000,6040
2021-10-13,6000,6060,5990,6010,630100,6010
2021-10-14,6030,6090,6020,6050,703700,6050


In [23]:
# 修正日付が「2021年10月14日」から「2021年10月20日」のデータ表示
main['2021-10-14':'2021-10-20']

Unnamed: 0_level_0,始値,高値,安値,終値,出来高,調整後終値*
修正日付,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-14,6030,6090,6020,6050,703700,6050
2021-10-15,6130,6280,6110,6270,1274100,6270
2021-10-18,6260,6290,6230,6280,751400,6280
2021-10-19,6300,6330,6260,6260,618700,6260
2021-10-20,6290,6380,6280,6320,806800,6320
