In [1]:
import requests
import pandas as pd
import time
import re
import csv
import os
from bs4 import BeautifulSoup
import random
import pickle
from tqdm import tqdm
import numpy as np
import os
pd.set_option("display.max_columns", None) # 表示する列数の制限を解除

import sys
sys.path.append("..") # 親ディレクトリを追加
from module.path_reader import PathReader
from module.get_html import GetHTML

In [2]:
# what: HTMLを解析して払い戻しテーブルをDataFrame化する関数
# for:  AIモデルの入力形式に合わせる
# in:   取得したhtml(.bin)
# out:  払い戻しテーブル(DataFrame)
def parse_return_html(html_text):
    soup = BeautifulSoup(html_text, "html.parser")

    pay_tables = soup.find_all("table", class_="Payout_Detail_Table")
    pay_data = []

    for tbl in pay_tables:
        for row in tbl.find_all("tr"):
            bet_type = row.find("th").get_text(strip=True) if row.find("th") else None
            result = " / ".join(span.get_text(strip=True) for span in row.select("td.Result span") if span.get_text(strip=True))
            payout = " / ".join(span.get_text(strip=True) for span in row.select("td.Payout span") if span.get_text(strip=True))
            popularity = " / ".join(span.get_text(strip=True) for span in row.select("td.Ninki span") if span.get_text(strip=True))

            pay_data.append([bet_type, result, payout, popularity])

    pay_df = pd.DataFrame(pay_data, columns=["券種", "馬番", "払戻金", "人気"])
    return pay_df

In [3]:
with open(r"C:\Users\yasak\デスクトップ\Keiba_App\mykeibaAI_ver1p0\data\race_result_html\202405020403.bin", "rb") as f:
    html_text = f.read().decode("EUC-JP", errors="ignore")

df_pay = parse_return_html(html_text)
print(df_pay)

    券種                       馬番               払戻金                 人気
0   単勝                        7            1,330円                5人気
1   複勝               7 / 6 / 15      280円620円180円    5人気 / 8人気 / 2人気
2   枠連                    3 / 4            1,310円                6人気
3   馬連                    6 / 7           18,450円               34人気
4  ワイド  6 / 7 / 7 / 15 / 6 / 15  3,480円880円1,510円  28人気 / 7人気 / 18人気
5   馬単                    7 / 6           33,720円               64人気
6  3連複               6 / 7 / 15           17,280円               48人気
7  3連単               7 / 6 / 15          199,900円              425人気


In [None]:
# tansho | tansho_pay | fukusyo1 | fukusyo1_pay | fukusyo2 | fukusyo2_pay | fukusyo3 | fukusyo3_pay | wakuren1 | wakuren2 | wakuren_pay | umaren1 | umaren2 | umaren_pay | wide1a | wide1b | wide1_pay | wide2a | wide2b | wide2_pay | wide3a | wide3b | wide3_pay | umatan1 | umatan2 | umatan_pay | sanrenpuku1 | sanrenpuku2 | sanrenpuku3 | sanrenpuku_pay | sanrentan1 | sanrentan2 | sanrentan3 | sanrentan_pay

In [5]:
# df_payを指定カラムに展開するユーティリティ

def convert_return_table(pay_df: pd.DataFrame) -> pd.DataFrame:
    columns = [
        "tansho", "tansho_pay",
        "fukusyo1", "fukusyo1_pay", "fukusyo2", "fukusyo2_pay", "fukusyo3", "fukusyo3_pay",
        "wakuren1", "wakuren2", "wakuren_pay",
        "umaren1", "umaren2", "umaren_pay",
        "wide1a", "wide1b", "wide1_pay", "wide2a", "wide2b", "wide2_pay", "wide3a", "wide3b", "wide3_pay",
        "umatan1", "umatan2", "umatan_pay",
        "sanrenpuku1", "sanrenpuku2", "sanrenpuku3", "sanrenpuku_pay",
        "sanrentan1", "sanrentan2", "sanrentan3", "sanrentan_pay",
    ]

    data = {col: None for col in columns}
    rows = {row["券種"]: row for _, row in pay_df.iterrows()}

    def split_parts(val: str):
        # Allow both " / " separated and compact strings
        parts = re.split(r"\s*/\s*", val or "")
        return [p for p in (part.strip() for part in parts) if p]

    def parse_nums(part: str):
        return [int(n) for n in re.findall(r"\d+", part or "")]

    def parse_pay_list(val: str):
        # Extract all numeric payouts even when written like "280円620円180円"
        return [int(n) for n in re.findall(r"\d+", (val or "").replace(",", ""))]

    # 単勝
    row = rows.get("単勝")
    if row is not None:
        res_parts = split_parts(row["馬番"])
        pay_vals = parse_pay_list(row["払戻金"])
        if res_parts:
            nums = parse_nums(res_parts[0])
            if nums:
                data["tansho"] = nums[0]
        if pay_vals:
            data["tansho_pay"] = pay_vals[0]

    # 複勝（最大3つ）
    row = rows.get("複勝")
    if row is not None:
        res_parts = split_parts(row["馬番"])
        pay_vals = parse_pay_list(row["払戻金"])
        for i in range(min(3, len(res_parts))):
            nums = parse_nums(res_parts[i])
            if nums:
                data[f"fukusyo{i+1}"] = nums[0]
            if i < len(pay_vals):
                data[f"fukusyo{i+1}_pay"] = pay_vals[i]

    # 枠連
    row = rows.get("枠連")
    if row is not None:
        res_parts = split_parts(row["馬番"])
        pay_vals = parse_pay_list(row["払戻金"])
        if res_parts:
            nums = parse_nums(res_parts[0])
            if len(nums) >= 2:
                data["wakuren1"], data["wakuren2"] = nums[0], nums[1]
        if pay_vals:
            data["wakuren_pay"] = pay_vals[0]

    # 馬連
    row = rows.get("馬連")
    if row is not None:
        res_parts = split_parts(row["馬番"])
        pay_vals = parse_pay_list(row["払戻金"])
        if res_parts:
            nums = parse_nums(res_parts[0])
            if len(nums) >= 2:
                data["umaren1"], data["umaren2"] = nums[0], nums[1]
        if pay_vals:
            data["umaren_pay"] = pay_vals[0]

    # ワイド（最大3つ）
    row = rows.get("ワイド")
    if row is not None:
        res_parts = split_parts(row["馬番"])
        pay_vals = parse_pay_list(row["払戻金"])
        for i in range(min(3, len(res_parts))):
            nums = parse_nums(res_parts[i])
            if len(nums) >= 2:
                data[f"wide{i+1}a"], data[f"wide{i+1}b"] = nums[0], nums[1]
            if i < len(pay_vals):
                data[f"wide{i+1}_pay"] = pay_vals[i]

    # 馬単
    row = rows.get("馬単")
    if row is not None:
        res_parts = split_parts(row["馬番"])
        pay_vals = parse_pay_list(row["払戻金"])
        if res_parts:
            nums = parse_nums(res_parts[0])
            if len(nums) >= 2:
                data["umatan1"], data["umatan2"] = nums[0], nums[1]
        if pay_vals:
            data["umatan_pay"] = pay_vals[0]

    # 三連複
    row = rows.get("三連複")
    if row is not None:
        res_parts = split_parts(row["馬番"])
        pay_vals = parse_pay_list(row["払戻金"])
        if res_parts:
            nums = parse_nums(res_parts[0])
            for idx, key in enumerate(["sanrenpuku1", "sanrenpuku2", "sanrenpuku3"]):
                if idx < len(nums):
                    data[key] = nums[idx]
        if pay_vals:
            data["sanrenpuku_pay"] = pay_vals[0]

    # 三連単
    row = rows.get("三連単")
    if row is not None:
        res_parts = split_parts(row["馬番"])
        pay_vals = parse_pay_list(row["払戻金"])
        if res_parts:
            nums = parse_nums(res_parts[0])
            for idx, key in enumerate(["sanrentan1", "sanrentan2", "sanrentan3"]):
                if idx < len(nums):
                    data[key] = nums[idx]
        if pay_vals:
            data["sanrentan_pay"] = pay_vals[0]

    return pd.DataFrame([data])[columns]

return_df = convert_return_table(df_pay)
return_df

Unnamed: 0,tansho,tansho_pay,fukusyo1,fukusyo1_pay,fukusyo2,fukusyo2_pay,fukusyo3,fukusyo3_pay,wakuren1,wakuren2,wakuren_pay,umaren1,umaren2,umaren_pay,wide1a,wide1b,wide1_pay,wide2a,wide2b,wide2_pay,wide3a,wide3b,wide3_pay,umatan1,umatan2,umatan_pay,sanrenpuku1,sanrenpuku2,sanrenpuku3,sanrenpuku_pay,sanrentan1,sanrentan2,sanrentan3,sanrentan_pay
0,7,1330,7,280,6,620,15,180,,,1310,,,18450,,,3480,,,880,,,1510,,,33720,,,,,,,,
