## モジュールのインポート

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import math

## 変数の定義

In [2]:
kaisai_date = "20260118"
year = kaisai_date[:4]

INPUT_DIR = Path(".", "list")
DATA_DIR = Path(".", "data", year, kaisai_date)
INDEX_DIR = Path(".", "index", kaisai_date)
RESULT_DIR = Path(".", "result", year)

INDEX_DIR.mkdir(exist_ok=True, parents=True)

## ファイル読み込み

In [3]:
jockey_list_df = pd.read_csv(INPUT_DIR / "jockey_list.csv")
sire_list_df = pd.read_csv(INPUT_DIR / "sire_list.csv")
trainer_list_df = pd.read_csv(INPUT_DIR / "trainer_list.csv")

base_df_preprocessed = pd.read_csv(DATA_DIR / f"preprocessed_data_{kaisai_date}.csv")

## 前処理

In [4]:
base_df_preprocessed = base_df_preprocessed.drop(["距離", "馬場状態"], axis=1)

In [5]:
# 各DFのindexを設定
jockey_list_df = jockey_list_df.set_index("騎手名")
sire_list_df = sire_list_df.set_index("種牡馬名")
trainer_list_df = trainer_list_df.set_index("調教師名")

In [6]:
# カラム名を一部変更
jockey_list_df = jockey_list_df.rename(columns={'1〜3勝C': '自己条件'})
sire_list_df = sire_list_df.rename(columns={'1〜3勝C': '自己条件', '短縮': '距離短縮', '延長': '距離延長'})
trainer_list_df = trainer_list_df.rename(columns={'1〜3勝C': '自己条件'})

In [7]:
sire_list_df["重賞"] = sire_list_df["OP"]

## 指数の参照とDataFrame化

### 騎手

In [8]:
# 騎手の各指数を参照
jockey_index_df = []
columns_name = ["場所", "クラス", "種別", "距離区分", "回り", "道悪判定"]

for i in range(len(base_df_preprocessed)):
    jockey_columns_list = []
    place = base_df_preprocessed.iloc[i]["場所"]
    R = base_df_preprocessed.iloc[i]["R"]
    umaban = base_df_preprocessed.iloc[i]["馬番"]
    horse_name = base_df_preprocessed.iloc[i]["馬名"]
    jockey_name = base_df_preprocessed.iloc[i]["騎手名"]
    if len(jockey_name) == 3:
        jockey_name = jockey_name + "　"
    elif len(jockey_name) == 2:
        jockey_name = jockey_name + "　　"
    jockey_index_list = [place, R, umaban, horse_name, jockey_name]
    
    for j in columns_name:
        l = base_df_preprocessed.iloc[i][j]
        jockey_columns_list.append(l)
    try:
        for k in jockey_columns_list:
            m = jockey_list_df.loc[jockey_name, k]
            # NaNを無視して値を追加
            if not math.isnan(m):  # NaNでない場合のみ追加
                jockey_index_list.append(m)
    except KeyError:
        jockey_index_list.append(np.nan)
    
    jockey_index_df.append(jockey_index_list)

jockey_index_df = pd.DataFrame(jockey_index_df)
jockey_index_df.columns = [
    "場所",
    "R",
    "馬番",
    "馬名",
    "騎手名",
    "騎手妙味度_1",
    "騎手妙味度_2",
    "騎手妙味度_3",
    "騎手妙味度_4",
    "騎手妙味度_5",
    "騎手妙味度_6",
]

In [9]:
# 各行の平均値を算出（NaNは無視して計算）
ave_list = []
for i in range(len(jockey_index_df)):
    num_list = []
    for j in range(5, 11):
        x = jockey_index_df.iloc[i, j]
        num_list.append(x)
    ave = round(np.nanmean(num_list), 3)  # 小数点第2位までを出力
    ave_list.append(ave)

jockey_index_df["騎手妙味度"] = ave_list

  ave = round(np.nanmean(num_list), 3)  # 小数点第2位までを出力


In [10]:
jockey_index_df["騎手妙味度順位"] = jockey_index_df.groupby([
    "場所", "R"
])["騎手妙味度"].rank(method="min", ascending=False)

In [11]:
jockey_index_df.to_csv(INDEX_DIR / f"jockey_index_{kaisai_date}.csv", index=False)

### 種牡馬

In [12]:
# 種牡馬の各指数を参照
sire_index_df = []
columns_name = ["場所", "クラス", "種別", "年齢", "距離区分", "回り", "距離変遷", "道悪判定"]

for i in range(len(base_df_preprocessed)):
    sire_columns_list = []
    place = base_df_preprocessed.iloc[i]["場所"]
    R = base_df_preprocessed.iloc[i]["R"]
    umaban = base_df_preprocessed.iloc[i]["馬番"]
    horse_name = base_df_preprocessed.iloc[i]["馬名"]
    sire_name = base_df_preprocessed.iloc[i]["種牡馬名"]
    sire_index_list = [place, R, umaban, horse_name, sire_name]
    
    for j in columns_name:
        l = base_df_preprocessed.iloc[i][j]
        sire_columns_list.append(l)
    try:
        for k in sire_columns_list:
            m = sire_list_df.loc[sire_name, k]
            # NaNを無視して値を追加
            if not math.isnan(m):  # NaNでない場合のみ追加
                sire_index_list.append(m)
    except KeyError:
        sire_index_list.append(np.nan)
    
    sire_index_df.append(sire_index_list)

sire_index_df = pd.DataFrame(sire_index_df)
sire_index_df.columns = [
    "場所",
    "R",
    "馬番",
    "馬名",
    "種牡馬名",
    "種牡馬妙味度_1",
    "種牡馬妙味度_2",
    "種牡馬妙味度_3",
    "種牡馬妙味度_4",
    "種牡馬妙味度_5",
    "種牡馬妙味度_6",
    "種牡馬妙味度_7",
    "種牡馬妙味度_8",
]

In [13]:
# 各行の平均値を算出（NaNは無視して計算）
ave_list = []
for i in range(len(sire_index_df)):
    num_list = []
    for j in range(5, 13):
        x = sire_index_df.iloc[i, j]
        num_list.append(x)
    ave = round(np.nanmean(num_list), 3)  # 小数点第2位までを出力
    ave_list.append(ave)

sire_index_df["種牡馬妙味度"] = ave_list

  ave = round(np.nanmean(num_list), 3)  # 小数点第2位までを出力


In [14]:
sire_index_df["種牡馬妙味度順位"] = sire_index_df.groupby([
    "場所", "R"
])["種牡馬妙味度"].rank(method="min", ascending=False)

In [15]:
sire_index_df.to_csv(INDEX_DIR / f"sire_index_{kaisai_date}.csv", index=False)

### 調教師

In [16]:
trainer_index_df = []
columns_name = ["場所", "クラス", "種別", "年齢", "距離区分", "回り", "臨戦過程", "道悪判定"]

for i in range(len(base_df_preprocessed)):
    trainer_columns_list = []
    place = base_df_preprocessed.iloc[i]["場所"]
    R = base_df_preprocessed.iloc[i]["R"]
    umaban = base_df_preprocessed.iloc[i]["馬番"]
    horse_name = base_df_preprocessed.iloc[i]["馬名"]
    trainer_name = base_df_preprocessed.iloc[i]["調教師名"]
    if len(trainer_name) == 3:
        trainer_name = trainer_name + "　"
    elif len(trainer_name) == 2:
        trainer_name = trainer_name + "　　"
    trainer_index_list = [place, R, umaban, horse_name, trainer_name]
    
    for j in columns_name:
        l = base_df_preprocessed.iloc[i][j]
        trainer_columns_list.append(l)
    try:
        for k in trainer_columns_list:
            m = trainer_list_df.loc[trainer_name, k]
            # NaNを無視して値を追加
            if not math.isnan(m):  # NaNでない場合のみ追加
                trainer_index_list.append(m)
    except KeyError:
        trainer_index_list.append(np.nan)
    
    trainer_index_df.append(trainer_index_list)

trainer_index_df = pd.DataFrame(trainer_index_df)
trainer_index_df.columns = [
    "場所",
    "R",
    "馬番",
    "馬名",
    "調教師名",
    "調教師妙味度_1",
    "調教師妙味度_2",
    "調教師妙味度_3",
    "調教師妙味度_4",
    "調教師妙味度_5",
    "調教師妙味度_6",
    "調教師妙味度_7",
    "調教師妙味度_8",
]

In [17]:
# 各行の平均値を算出（NaNは無視して計算）
ave_list = []
for i in range(len(trainer_index_df)):
    num_list = []
    for j in range(5, 13):
        x = trainer_index_df.iloc[i, j]
        num_list.append(x)
    ave = round(np.nanmean(num_list), 3)  # 小数点第2位までを出力
    ave_list.append(ave)

trainer_index_df["調教師妙味度"] = ave_list

  ave = round(np.nanmean(num_list), 3)  # 小数点第2位までを出力


In [18]:
trainer_index_df["調教師妙味度順位"] = trainer_index_df.groupby([
    "場所", "R"
])["調教師妙味度"].rank(method="min", ascending=False)

In [19]:
trainer_index_df.to_csv(INDEX_DIR / f"trainer_index_{kaisai_date}.csv", index=False)

## データ結合と総合妙味度の算出

In [20]:
index_results_df = jockey_index_df.merge(sire_index_df)
index_results_df = index_results_df.merge(trainer_index_df)

In [21]:
index_results_df = index_results_df[[
    "場所",
    "R",
    "馬番",
    "馬名",
    "騎手妙味度",
    "騎手妙味度順位",
    "種牡馬妙味度",
    "種牡馬妙味度順位",
    "調教師妙味度",
    "調教師妙味度順位"
]]
index_results_df

Unnamed: 0,場所,R,馬番,馬名,騎手妙味度,騎手妙味度順位,種牡馬妙味度,種牡馬妙味度順位,調教師妙味度,調教師妙味度順位
0,中山,1,1,イッツソーブライト,92.2,14.0,,,82.667,16.0
1,中山,1,2,キタノトゥシャイン,90.4,15.0,101.333,2.0,96.833,6.0
2,中山,1,3,プラチナムディスク,100.2,2.0,80.800,11.0,96.667,8.0
3,中山,1,4,ヘイマー,,,89.714,10.0,90.500,13.0
4,中山,1,5,グレイシエーション,98.4,8.0,96.857,6.0,91.000,12.0
...,...,...,...,...,...,...,...,...,...,...
346,京都,12,7,サニーサルサ,89.6,9.0,98.000,7.0,101.500,6.0
347,京都,12,8,デビューフライト,102.0,3.0,89.600,11.0,98.167,8.0
348,京都,12,9,ジョワイユノエル,104.0,2.0,105.000,2.0,104.143,5.0
349,京都,12,10,アフェシス,117.2,1.0,104.857,3.0,97.667,9.0


In [22]:
index_results_df["妙味度合計"]= \
    index_results_df["騎手妙味度"] + index_results_df["種牡馬妙味度"] + index_results_df["調教師妙味度"]

In [23]:
index_results_df["総合妙味度"] = round(index_results_df["妙味度合計"] / 3, 3)

In [24]:
index_results_df["総合妙味度順位"] = index_results_df.groupby([
    "場所", "R"
])["総合妙味度"].rank(method="min", ascending=False)

In [25]:
index_results_df.to_csv(RESULT_DIR / f"results_index_{kaisai_date}.csv", index=False, encoding="shift-jis")