In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
import matplotlib.pyplot as plt
import seaborn as sns

module_path = (Path().resolve().parent/ "Modules")
sys.path.append(str(module_path))

pd.set_option("display.max_columns", None)
plt.rcParams["font.family"] = "IPAexGothic"

import my_modules, model_tuner, features, prob_calculator # 自作モジュール

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [2]:
df_pred = pd.read_csv("df_pred.csv")
df_pred["id_for_fold"] = df_pred["race_id"] // 100
df_pred.head()

Unnamed: 0,year,month,day,horse,pred_class_1,pred_class_2,pred_class_3,target,race_id,waku_num,id_for_fold
0,2025,2,2,ラファールドール,0.033841,0.039362,0.041342,0,202502020501020205,4,2025020205010202
1,2025,2,2,ロードインフェルノ,0.044336,0.060592,0.082243,0,202502020501020204,3,2025020205010202
2,2025,2,2,タイセイアーサー,0.023447,0.035971,0.049787,0,202502020501020207,5,2025020205010202
3,2025,2,2,ブリスキー,0.096036,0.068693,0.058486,0,202502020501020202,2,2025020205010202
4,2025,2,2,マルチライセンス,0.018028,0.025876,0.034216,0,202502020501020203,3,2025020205010202


In [33]:
from prob_calculator import calc_waku_pred

tmp = calc_waku_pred(df_pred)

2025042605020107
2025052404010709


In [34]:
tmp[tmp["id_for_fold"] == 2025052404010709]

Unnamed: 0,waku_pair,pred,id_for_fold,target,wakuren_confirmed_odds
37836,枠1-1,0.0,2025052404010709,0,0.0
37837,枠1-2,0.00194,2025052404010709,0,331.6
37838,枠1-3,0.010935,2025052404010709,0,216.0
37839,枠1-4,0.003499,2025052404010709,0,303.1
37840,枠1-5,0.003726,2025052404010709,0,280.3
37841,枠1-6,0.002085,2025052404010709,0,370.3
37842,枠1-7,0.002866,2025052404010709,0,409.7
37843,枠1-8,0.008513,2025052404010709,0,116.8
37844,枠2-2,0.002216,2025052404010709,0,415.1
37845,枠2-3,0.046412,2025052404010709,0,49.0


### これらのデータからkelly基準を作成する予定

In [37]:
tmp.head()

Unnamed: 0,waku_pair,pred,id_for_fold,target,wakuren_confirmed_odds
0,枠1-1,0.0,2025020205010202,0,0.0
1,枠1-2,0.01541,2025020205010202,0,21.6
2,枠1-3,0.013782,2025020205010202,0,46.3
3,枠1-4,0.012742,2025020205010202,0,47.0
4,枠1-5,0.023061,2025020205010202,0,39.5


In [48]:
def calc_kelly(df, KELLY_FRACTION=0.5):
    df = df.copy()
    # オッズの最小値を1に指定
    df["wakuren_confirmed_odds"] = np.clip(df["wakuren_confirmed_odds"], 1, None)

    p = df["pred"]
    q = 1 - p
    b = df["wakuren_confirmed_odds"] - 1
    df["kelly_criterion"] = p - (q / b)

    display(df.head(50))
    return df

kelly_df = calc_kelly(tmp)

Unnamed: 0,waku_pair,pred,id_for_fold,target,wakuren_confirmed_odds,kelly_criterion
0,枠1-1,0.0,2025020205010202,0,1.0,-inf
1,枠1-2,0.01541,2025020205010202,0,21.6,-0.032385
2,枠1-3,0.013782,2025020205010202,0,46.3,-0.007989
3,枠1-4,0.012742,2025020205010202,0,47.0,-0.00872
4,枠1-5,0.023061,2025020205010202,0,39.5,-0.002314
5,枠1-6,0.055661,2025020205010202,0,6.6,-0.112971
6,枠1-7,0.02884,2025020205010202,0,18.1,-0.027953
7,枠1-8,0.024452,2025020205010202,0,14.9,-0.045731
8,枠2-2,0.0,2025020205010202,0,1.0,-inf
9,枠2-3,0.01343,2025020205010202,0,74.9,8e-05


In [50]:
kelly_df[kelly_df["kelly_criterion"] > 0].head(50)

Unnamed: 0,waku_pair,pred,id_for_fold,target,wakuren_confirmed_odds,kelly_criterion
9,枠2-3,0.01343,2025020205010202,0,74.9,8e-05
11,枠2-5,0.02183,2025020205010202,0,59.5,0.005109
13,枠2-7,0.026939,2025020205010202,0,38.2,0.000782
15,枠3-3,0.002343,2025020205010202,0,454.7,0.000144
16,枠3-4,0.010523,2025020205010202,0,107.0,0.001188
17,枠3-5,0.01956,2025020205010202,0,92.8,0.00888
18,枠3-6,0.047966,2025020205010202,0,23.1,0.004887
19,枠3-7,0.024769,2025020205010202,0,74.7,0.011537
22,枠4-5,0.018083,2025020205010202,0,88.5,0.006861
23,枠4-6,0.044301,2025020205010202,0,22.7,0.00026


In [43]:
id = tmp.id_for_fold.unique()[1]
tmp[tmp.id_for_fold == id]

Unnamed: 0,waku_pair,pred,id_for_fold,target,wakuren_confirmed_odds
36,枠1-1,0.0,2025020208010202,0,0.0
37,枠1-2,0.000212,2025020208010202,0,468.5
38,枠1-3,0.005014,2025020208010202,0,151.8
39,枠1-4,0.000105,2025020208010202,0,636.9
40,枠1-5,0.008854,2025020208010202,0,100.7
41,枠1-6,0.007634,2025020208010202,0,111.9
42,枠1-7,0.002172,2025020208010202,0,258.2
43,枠1-8,0.008907,2025020208010202,0,88.1
44,枠2-2,0.0,2025020208010202,0,0.0
45,枠2-3,0.00289,2025020208010202,0,260.3


In [44]:
race_data = df_pred[df_pred.id_for_fold == id][["horse", "waku_num", "pred_class_1", "pred_class_2"]].sort_values("waku_num")
race_data

Unnamed: 0,horse,waku_num,pred_class_1,pred_class_2
16,タマモアオゾラ,1,0.008787,0.020374
22,ラヴリーチェリー,2,0.005203,0.011652
20,ホウショウマリス,3,0.144972,0.16474
19,ヒカリスペクトル,4,0.002674,0.00558
17,テイエムダイタカ,5,0.050159,0.09643
24,バシリス,5,0.213489,0.176064
14,バトンロード,6,0.030422,0.056497
21,キングコロネット,6,0.194789,0.181051
15,チュンビーム,7,0.006318,0.014583
23,ドゥドゥ,7,0.051308,0.085572
