# インポート

In [23]:
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
from tqdm.notebook import tqdm
from pathlib import Path
import scraping
import create_table
import create_prediction_population
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
%autoreload

# 事前準備

In [26]:
# 当日出走馬の母集団を作成する
predict_population = create_prediction_population.create(kaisai_date="20250302",save_filename="population.csv")
predict_population

scraping race_id_list...


  0%|          | 0/1 [00:00<?, ?it/s]

scraping horse_id_list...


  0%|          | 0/36 [00:00<?, ?it/s]

Unnamed: 0,date,race_id,horse_id
0,2025-03-02,202506020201,2022105560
1,2025-03-02,202506020201,2022102356
2,2025-03-02,202506020201,2022104586
3,2025-03-02,202506020201,2022106586
4,2025-03-02,202506020201,2022106108
...,...,...,...
11,2025-03-02,202510011212,2021105772
12,2025-03-02,202510011212,2017105392
13,2025-03-02,202510011212,2021101440
14,2025-03-02,202510011212,2021107279


## 出走馬のホースデータテーブルの作成

In [27]:
horse_id_list = predict_population["horse_id"].unique()
len(horse_id_list)

539

In [29]:
# 保存済みのhtmlファイルはskipしたい✙出走馬のhtml_path_listは格納したいので
htmls_horse_list = scraping.scrape_html_horse(
    horse_id_list= horse_id_list,
    skip=False,
    path=True,
)

  0%|          | 0/539 [00:00<?, ?it/s]

In [30]:
len(htmls_horse_list)

539

In [32]:
# 予測馬の過去成績テーブルの作成
# horse_results_predictionに予測馬を追加したい場合はupdateをオフにしてもよいかも
horse_results_prediction = create_table.table_horse_results(
    html_path_list = htmls_horse_list,
    save_filename="horse_results_prediction.csv"
    )
horse_results_prediction

  0%|          | 0/539 [00:00<?, ?it/s]

table not found at 2022106586
table not found at 2022104838
table not found at 2022100818
table not found at 2022104955
table not found at 2022106226
table not found at 2022102977
table not found at 2022103233
table not found at 2022103465
table not found at 2022100494
table not found at 2022100330
table not found at 2022101397
table not found at 2022103001
table not found at 2022102175
table not found at 2022101119


Unnamed: 0,horse_id,日付,開催,天気,R,レース名,映像,頭数,枠番,馬番,...,着差,ﾀｲﾑ指数,通過,ペース,上り,馬体重,厩舎ｺﾒﾝﾄ,備考,勝ち馬(2着馬),賞金
0,2022105560,2024/08/10,3新潟1,晴,6.0,2歳新馬,,18,8.0,18,...,2.2,**,3-3,34.7-35.7,37.6,400(0),,,セナマリン,
0,2022102356,2024/11/30,5中山1,晴,1.0,2歳未勝利,,16,8.0,16,...,0.4,**,4-4,34.1-38.6,38.5,422(-4),,,サノノアメリカン,
1,2022102356,2024/11/03,3福島2,晴,1.0,2歳未勝利,,16,7.0,14,...,1.8,**,5-6,31.4-36.6,38.0,426(-2),,,ゴメンネジロー,
2,2022102356,2024/10/20,4東京6,晴,4.0,2歳新馬,,16,3.0,6,...,3.5,**,5-5,36.2-36.1,39.1,428(0),,,スナッピードレッサ,
0,2022104586,2025/02/02,1東京2,雨,1.0,3歳未勝利,,16,5.0,9,...,0.7,**,2-2,35.9-38.3,38.9,460(+2),,,スマイルディシー,56.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,2021100752,2024/06/29,3小倉1,曇,7.0,3歳未勝利,,16,7.0,13,...,0.0,**,2-2-2-2,30.2-37.6,37.5,472(+6),,,テイエムデンセツ,220.0
6,2021100752,2024/06/16,4京都6,晴,2.0,3歳未勝利,,16,2.0,3,...,1.0,**,2-2-2-2,36.8-37.8,38.7,466(-12),,,メイショウソウタ,83.0
7,2021100752,2024/05/04,1新潟3,晴,2.0,3歳未勝利,,15,4.0,7,...,0.3,**,1-1-1-1,37.0-38.1,38.4,478(+6),,,ピカレスクノベル,220.0
8,2021100752,2024/04/13,2阪神7,晴,3.0,3歳未勝利,,16,8.0,16,...,0.6,**,2-2-2-2,35.5-39.0,39.4,472(+10),,,オクヤマ,55.0


## 出走馬の血統データテーブルの作成

In [33]:
# 保存済みのhtmlファイルはskipしたい✙出走馬のhtml_path_listは格納したいので
htmls_horse_peds = scraping.scrape_html_ped(
    horse_id_list = horse_id_list,
    skip=True,
    path=True,
)

  0%|          | 0/539 [00:00<?, ?it/s]

skipped:2022105560
skipped:2022102356
skipped:2022104586
skipped:2022106108
skipped:2022102723
skipped:2022102892
skipped:2022105368
skipped:2022103450
skipped:2022101760
skipped:2022103370
skipped:2022100588
skipped:2022101636
skipped:2022103895
skipped:2022107233
skipped:2022100162
skipped:2022104310
skipped:2022100775
skipped:2022101090
skipped:2022105459
skipped:2022100702
skipped:2022100105
skipped:2022106388
skipped:2022102505
skipped:2022100318
skipped:2022102380
skipped:2022104486
skipped:2022105905
skipped:2022100263
skipped:2022102352
skipped:2022100682
skipped:2022105257
skipped:2022102651
skipped:2022102455
skipped:2022106995
skipped:2022104759
skipped:2022104675
skipped:2022104795
skipped:2022102347
skipped:2022105217
skipped:2022105541
skipped:2022101685
skipped:2022105457
skipped:2022102284
skipped:2022102601
skipped:2022103528
skipped:2022104191
skipped:2022105687
skipped:2022105464
skipped:2022101492
skipped:2022100848
skipped:2022105167
skipped:2022104539
skipped:2022

In [34]:
horse_peds = create_table.create_peds(
    html_path_list=htmls_horse_peds,
    save_filename="peds.prediction.csv"
    )
horse_peds


  0%|          | 0/539 [00:00<?, ?it/s]

Unnamed: 0,horse_id,ped_0,ped_1,ped_2,ped_3,ped_4,ped_5,ped_6,ped_7,ped_8,...,ped_52,ped_53,ped_54,ped_55,ped_56,ped_57,ped_58,ped_59,ped_60,ped_61
0,2022105560,2014106083,2001103038,000a00033a,000a0012bf,000a000f2b,000a007459,000a008c1e,000a0019b6,000a008c1d,...,000a008d20,000a000db7,000a008d1f,000a0119aa,000a001205,000a000e04,000a007c8d,000a0119a9,000a002214,000a0119a8
0,2022102356,000a01b93e,000a012aad,000a0122f5,000a010542,000a001cd0,000a01008f,000a012327,000a00193d,000a00fb5d,...,000a007fbe,000a000dcd,000a007fbd,000a00b1f7,000a001bbf,000a0012a7,000a0091b1,000a00b1f8,000a00224d,000a00b1fb
0,2022104586,2011100655,2004103328,1995108676,000a0019b4,000a0012cb,000a008c0e,000a00a4b9,000a0000d3,000a00a4b8,...,000a0087a5,000a000e0e,000a007c3b,000a010ac2,000a00193d,000a00193c,000a008ae3,000a0085f9,000a0017b8,000a0085f6
0,2022106586,000a014589,000a0103ba,000a001c1d,000a001607,000a000e46,000a007e0c,000a00931f,000a0012bf,000a009315,...,000a007e0c,000a000f87,000a007d1c,000a009924,000a000f14,000a001181,000a006963,000a00885f,000a000e46,000a006f14
0,2022106108,000a013bba,000a011d00,000a0000d3,000a000e04,000a000f8c,000a00702e,000a000491,000a000e55,000a006f25,...,000a0003a2,000a001cb4,000a00922c,000a010ce1,000a001d7e,000a001607,000a009961,000a00a99e,000a00185d,000a009ac9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,2021105772,2008103552,2001103460,000a001d7e,000a001607,000a000e46,000a007e0c,000a009961,000a001676,000a0084c9,...,000a009961,000a001676,000a0084c9,000a0107a7,000a001fb5,000a00185d,000a00940f,000a0107a6,000a010752,000a008e6b
0,2017105392,2007103143,2001103460,000a001d7e,000a001607,000a000e46,000a007e0c,000a009961,000a001676,000a0084c9,...,000a000071,000a0021eb,000a00a3ba,1996107386,000a00033a,000a0012bf,000a008c1e,000a006496,000a001676,000a009bb6
0,2021101440,2014106201,2001103460,000a001d7e,000a001607,000a000e46,000a007e0c,000a009961,000a001676,000a0084c9,...,000a008e05,000a000ded,000a008e04,000a009f48,000a001785,000a000efe,000a00852e,000a0094f5,000a000f44,000a0094f4
0,2021107279,000a014577,000a011d00,000a0000d3,000a000e04,000a000f8c,000a00702e,000a000491,000a000e55,000a006f25,...,000a00884d,000a001847,000a00884c,000a010be5,000a001607,000a000e46,000a007e0c,000a00930d,000a000e04,000a008785
