# 役割
本番運用時の予測を行うnotebook。モデルの学習（`train.ipynb`）が完了している必要がある。

# インポート

In [1]:
import preprocessing
from feature_producing_prediction import PredictionFeatureCreator
import predictioning
import pandas as pd
from pathlib import Path
%load_ext autoreload

In [17]:
%autoreload

# 事前準備
**当日出走馬が確定した時点**で実行できる

In [7]:
# 予測時の学習母集団はcommon/data/prediction_population/に配置している
POPULATION_DIR = preprocessing.COMMON_DATA_DIR / "prediction_population"

In [8]:
# 当日出走馬の過去成績テーブルの前処理
horse_results_preprocessed = preprocessing.process_horse_results(
    population_dir=POPULATION_DIR,
    population_filename = "population.csv",
    input_filename="horse_results_prediction.csv",
    output_filename="horse_results_prediction.csv"
)
# 当日出走馬の血統テーブルの前処理
peds_preprocessed = preprocessing.process_peds(
    population_dir=POPULATION_DIR,
    populaton_filename= "population.csv",
    input_filename="peds_prediction.csv",
    output_filename="peds_prediction.csv"
)

In [4]:
# データの重複チェック
horse_results_preprocessed.duplicated(subset=[ "horse_id", "date"]).sum(), peds_preprocessed.isnull().sum() 


(np.int64(0),
 horse_id    0
 sire_id     0
 bms_id      0
 dtype: int64)

# 当日の予測処理
レース直前出走直前に実行する

In [41]:
pfc = PredictionFeatureCreator(race_id="202505021011", weight = True)
pfc.create_features()
features = await pfc.update_odds_and_popularity()

agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


In [42]:
# 予測
prediction = predictioning.predict(
    features,
    # model_filename="model.pkl",
    # calibration_model_filename="calibration_model.pkl",
    model_filename="model_latest.pkl",
    calibration_model_filename="calibration_model_latest.pkl",
    config_filepath="config.yaml",
    sort_col="expect_return_calibrated",
)
prediction

Unnamed: 0,race_id,umaban,tansho_odds,popularity,pred,pred_calibrated,expect_return,expect_return_calibrated
11,202505021011,12,15.1,6.0,0.086599,0.087001,1.30764,1.313715
10,202505021011,11,315.1,18.0,0.002484,0.004042,0.782771,1.273646
5,202505021011,6,78.7,12.0,0.012066,0.01312,0.949561,1.032507
0,202505021011,1,3.6,2.0,0.217735,0.23049,0.783847,0.829764
4,202505021011,5,3.6,2.0,0.210494,0.23049,0.75778,0.829764
8,202505021011,9,3.2,1.0,0.274215,0.256983,0.877488,0.822346
2,202505021011,3,18.5,7.0,0.043826,0.042511,0.810774,0.786462
6,202505021011,7,29.2,8.0,0.026873,0.026786,0.784694,0.782143
16,202505021011,17,186.1,14.0,0.004737,0.004042,0.881629,0.752223
9,202505021011,10,42.2,9.0,0.016519,0.015267,0.697084,0.644275


In [36]:
# 予測
prediction = predictioning.predict(
    features,
    model_filename="model_wr.pkl",
    calibration_model_filename="calibration_model_wr.pkl",
    config_filepath="config_wr.yaml",
    # model_filename="model.pkl",   
    # calibration_model_filename="calibration_model.pkl",
    sort_col="expect_return",    
)
prediction

Unnamed: 0,race_id,umaban,tansho_odds,popularity,pred,pred_calibrated,expect_return,expect_return_calibrated
17,202505021011,18,12.1,4.0,0.095485,0.082759,1.155368,1.001379
11,202505021011,12,16.5,7.0,0.067037,0.042679,1.106107,0.704209
8,202505021011,9,3.1,1.0,0.296057,0.267045,0.917777,0.827841
15,202505021011,16,91.2,13.0,0.009393,0.006039,0.856687,0.550725
14,202505021011,15,14.1,5.0,0.060425,0.042679,0.851994,0.601778
13,202505021011,14,55.9,11.0,0.01521,0.010357,0.850245,0.578941
5,202505021011,6,57.4,12.0,0.014602,0.010357,0.838168,0.594476
1,202505021011,2,190.1,17.0,0.004215,0.003851,0.801356,0.732092
2,202505021011,3,15.3,6.0,0.050208,0.042679,0.76818,0.652993
0,202505021011,1,4.0,2.0,0.191381,0.209756,0.765524,0.839024


# 一括で１日分のレースを予測する

In [15]:
post_time = await predictioning.scrape_race_time_table(kaisai_date="20250525")
post_time

Unnamed: 0,race_id,race_name,post_time
0,202504010801,3歳未勝利,09:50
1,202508021001,3歳未勝利,09:55
2,202505021001,3歳未勝利,10:05
3,202504010802,3歳未勝利,10:15
4,202508021002,3歳未勝利,10:20
5,202505021002,3歳未勝利,10:30
6,202504010803,3歳未勝利,10:40
7,202508021003,3歳未勝利,10:50
8,202505021003,3歳未勝利,11:00
9,202504010804,4歳以上障害未勝利,11:10


In [16]:
race_id_list = post_time["race_id"].unique()

In [17]:
# features_dfにfeaturesを格納してつなげる
features_df = []
for race_id in race_id_list:
    pfc = PredictionFeatureCreator(race_id=race_id, weight = False)
    pfc.create_features()
    features = await pfc.update_odds_and_popularity()
    features_df.append(features)
# features_dfを結合
features_df = pd.concat(features_df)


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


agg_horse_n_races_relative:   0%|          | 0/5 [00:00<?, ?it/s]

running agg_interval()...
fetching shubuta page html...


agg_horse_per_course_len:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_ground_state_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_class:   0%|          | 0/6 [00:00<?, ?it/s]

agg_horse_per_race_type:   0%|          | 0/6 [00:00<?, ?it/s]

running agg_jockey()...
running agg_trainer()...
running agg_sire()...
running cross_feature()...
merging all features...


  pd.read_html(html)[0][["馬番", "単勝"]]


In [18]:
features_df

Unnamed: 0,date_x,race_id,horse_id,jockey_id,trainer_id,umaban,wakuban,impost,sex,age,...,wakuban_around,umaban_around,month,sin_date,cos_date,month_sex,sin_date_sex,cos_date_sex,tansho_odds,popularity
0,2025-05-25,202504010801,2022100263,1117,1081,1,1,57.0,0,3,...,,,5,0.602988,-0.79775,-5.0,-0.602988,0.79775,6.3,3.0
1,2025-05-25,202504010801,2022105627,1187,1112,2,1,53.0,1,3,...,,,5,0.602988,-0.79775,5.0,0.602988,-0.79775,29.5,9.0
2,2025-05-25,202504010801,2022100260,689,1190,3,2,57.0,0,3,...,,,5,0.602988,-0.79775,-5.0,-0.602988,0.79775,10.6,6.0
3,2025-05-25,202504010801,2022105593,1185,1146,4,2,55.0,1,3,...,,,5,0.602988,-0.79775,5.0,0.602988,-0.79775,9.1,5.0
4,2025-05-25,202504010801,2022100759,1176,1112,5,3,55.0,1,3,...,,,5,0.602988,-0.79775,5.0,0.602988,-0.79775,102.9,13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11,2025-05-25,202505021012,2021106797,1077,1020,12,6,55.0,0,4,...,,,5,0.602988,-0.79775,-5.0,-0.602988,0.79775,10.2,4.0
12,2025-05-25,202505021012,2018104778,1150,1052,13,7,54.0,0,7,...,,,5,0.602988,-0.79775,-5.0,-0.602988,0.79775,48.3,14.0
13,2025-05-25,202505021012,2019101875,5115,1119,14,7,57.0,0,6,...,,,5,0.602988,-0.79775,-5.0,-0.602988,0.79775,15.8,9.0
14,2025-05-25,202505021012,2020101036,1186,1121,15,8,56.0,0,5,...,,,5,0.602988,-0.79775,-5.0,-0.602988,0.79775,13.2,7.0


In [19]:
# 予測
prediction = predictioning.predict(
    features_df,
    # model_filename="model.pkl",
    # calibration_model_filename="calibration_model.pkl",
    model_filename="model.pkl",
    calibration_model_filename="calibration_model.pkl",
    config_filepath="config.yaml",
    sort_col="expect_return_calibrated",
)
sorted_predictions = prediction.groupby("race_id").apply(
    lambda x: x.sort_values(by="expect_return_calibrated", ascending=False)
)
sorted_predictions

KeyError: "['weight', 'weight_diff'] not in index"

In [20]:
sorted_predictions = sorted_predictions[sorted_predictions["expect_return"] >= 1.2]
sorted_predictions

Unnamed: 0_level_0,Unnamed: 1_level_0,race_id,umaban,tansho_odds,popularity,pred,pred_calibrated,expect_return,expect_return_calibrated
race_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
202503010102,5,202503010102,6,9.5,3.0,0.13408,0.117207,1.273761,1.113466
202503010105,4,202503010105,5,17.9,7.0,0.068072,0.077586,1.218496,1.388793
202503010110,6,202503010110,7,16.1,7.0,0.084244,0.088832,1.356324,1.430203
202506030507,7,202506030507,8,227.3,13.0,0.005583,0.001544,1.269062,0.351042
202509020510,13,202509020510,14,110.3,16.0,0.011399,0.010695,1.257296,1.179679
202509020512,8,202509020512,9,71.4,11.0,0.01738,0.016355,1.240938,1.167757


In [31]:
# 予測
prediction = predictioning.predict(
    features_df,
    model_filename="model_wr.pkl",
    calibration_model_filename="calibration_model_wr.pkl",
    config_filepath="config_wr.yaml",
    # model_filename="model.pkl",   
    # calibration_model_filename="calibration_model.pkl",
    sort_col="expect_return_calibrated",    
)
sorted_predictions = prediction.groupby("race_id").apply(
    lambda x: x.sort_values(by="expect_return_calibrated", ascending=False)
)
sorted_predictions

  sorted_predictions = prediction.groupby("race_id").apply(


Unnamed: 0_level_0,Unnamed: 1_level_0,race_id,umaban,tansho_odds,popularity,pred,pred_calibrated,expect_return,expect_return_calibrated
race_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
202504010801,12,202504010801,13,3.7,1.0,0.245050,0.252654,0.906687,0.934820
202504010801,3,202504010801,4,9.1,5.0,0.111481,0.094488,1.014481,0.859843
202504010801,10,202504010801,11,29.9,10.0,0.026594,0.027473,0.795158,0.821429
202504010801,1,202504010801,2,29.5,9.0,0.025643,0.027473,0.756474,0.810440
202504010801,11,202504010801,12,11.8,7.0,0.073475,0.066465,0.867009,0.784290
...,...,...,...,...,...,...,...,...,...
202508021012,3,202508021012,4,4.2,2.0,0.175092,0.190476,0.735386,0.800000
202508021012,8,202508021012,9,21.3,9.0,0.035454,0.035088,0.755180,0.747368
202508021012,6,202508021012,7,7.9,4.0,0.108327,0.094488,0.855783,0.746457
202508021012,0,202508021012,1,10.0,6.0,0.082685,0.066465,0.826853,0.664653


In [32]:
sorted_predictions = sorted_predictions[(sorted_predictions["expect_return"] >= 1.0) 
                                        &  (sorted_predictions["pred"] >= 0.03)
                                        ]
# predが0.3以上のものを抽出
sorted_predictions

Unnamed: 0_level_0,Unnamed: 1_level_0,race_id,umaban,tansho_odds,popularity,pred,pred_calibrated,expect_return,expect_return_calibrated
race_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
202504010801,3,202504010801,4,9.1,5.0,0.111481,0.094488,1.014481,0.859843
202504010804,7,202504010804,8,4.9,2.0,0.212171,0.252654,1.03964,1.238004
202504010807,0,202504010807,1,21.4,10.0,0.047811,0.042679,1.023148,0.913337
202504010808,3,202504010808,4,10.1,5.0,0.106732,0.094488,1.077994,0.954331
202504010811,1,202504010811,2,19.0,11.0,0.054641,0.042679,1.038175,0.810907
202505021004,14,202505021004,15,30.7,8.0,0.035623,0.035088,1.093616,1.077193
202505021006,5,202505021006,6,2.9,1.0,0.345375,0.43,1.001588,1.247
202505021006,6,202505021006,7,19.3,8.0,0.056992,0.042679,1.099938,0.823711
202505021006,8,202505021006,9,17.6,6.0,0.060229,0.042679,1.060027,0.751156
202505021008,7,202505021008,8,13.0,6.0,0.078183,0.066465,1.016383,0.864048
