# ボートレース場ごとの着順予想モデル

対象：2025年12月1日～31日のデータを使用して、ボートレース場ごとに着順予想モデルを構築。
GradientBoostingRegressor と GradientBoostingClassifier を比較します。

## セットアップ

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## データ変形関数の定義

In [2]:
def reshape_programs(df):
    """
    Programs を艇単位に変形
    各艇について、レース情報と艇固有情報（選手、モーター、ボート）を1行に
    """
    frames = []
    race_cols = ['レースコード', 'レース日', 'レース場', 'レース回']
    
    for frame in range(1, 7):
        prefix = f'{frame}枠_'
        cols = [c for c in df.columns if c.startswith(prefix)]
        if cols:
            tmp = df[race_cols + cols].copy()
            tmp.columns = race_cols + [c[len(prefix):] for c in cols]
            tmp['枠'] = frame
            frames.append(tmp)
    
    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()

def reshape_previews(df):
    """
    Previews を艇単位に変形
    レース情報と艇固有情報（展示タイム、チルト調整など）を1行に
    """
    frames = []
    
    # レース共通属性を抽出
    race_cols = ['レースコード', 'レース日', 'レース場', 'レース回']
    race_attrs = ['風速(m)', '風向', '波の高さ(cm)', '天候', '気温(℃)', '水温(℃)']
    
    # 各艇について処理
    for boat in range(1, 7):
        prefix = f'艇{boat}_'
        boat_cols = [c for c in df.columns if c.startswith(prefix)]
        if boat_cols:
            tmp = df[race_cols + race_attrs + boat_cols].copy()
            # 艇固有列をリネーム
            boat_col_names = [c[len(prefix):] for c in boat_cols]
            tmp.columns = race_cols + race_attrs + boat_col_names
            tmp['艇番'] = boat
            frames.append(tmp)
    
    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()

def reshape_results(df):
    """
    Results を艇単位に変形
    着順情報を艇番とマッチングして1行に集約
    """
    # 1着～6着の情報を艇番でマッピング
    result_list = []
    
    for idx, row in df.iterrows():
        race_code = row['レースコード']
        
        for place in range(1, 7):
            boat_col = f'{place}着_艇番'
            if boat_col in df.columns and pd.notna(row[boat_col]):
                boat_num = int(row[boat_col])
                result_list.append({
                    'レースコード': race_code,
                    '艇番': boat_num,
                    '着順': place
                })
    
    return pd.DataFrame(result_list) if result_list else pd.DataFrame()

print('Reshape functions ready')

Reshape functions ready


## 2025年12月のデータで学習

### 1. データ読み込み

In [3]:
# Load data for 2025-12
cwd = Path.cwd()
repo_root = cwd if (cwd / 'data').exists() else cwd.parent.parent

year = '2025'
month = '12'
date_list = [f'{day:02d}' for day in range(1, 32)]

print(f"Loading data for {year}-{month}...")

all_data = {}
for date_str in date_list:
    prog_path = repo_root / 'data' / 'programs' / year / month / f'{date_str}.csv'
    prev_path = repo_root / 'data' / 'previews' / year / month / f'{date_str}.csv'
    res_path = repo_root / 'data' / 'results' / year / month / f'{date_str}.csv'
    
    if prog_path.exists() and prev_path.exists() and res_path.exists():
        all_data[date_str] = {
            'programs': pd.read_csv(prog_path),
            'previews': pd.read_csv(prev_path),
            'results': pd.read_csv(res_path)
        }
        print(f"✓ {date_str}")

print(f"\nLoaded {len(all_data)} days")

Loading data for 2025-12...
✓ 01
✓ 02
✓ 03
✓ 04
✓ 05
✓ 06
✓ 07
✓ 08
✓ 09
✓ 10
✓ 11
✓ 12
✓ 13
✓ 14
✓ 15
✓ 16
✓ 17
✓ 18
✓ 19
✓ 20
✓ 21
✓ 22
✓ 23
✓ 24
✓ 25
✓ 26
✓ 27
✓ 28
✓ 29
✓ 30
✓ 31

Loaded 31 days


### 2. データ統合

In [4]:
combined_data = []

for date_str, data in all_data.items():
    try:
        prog = reshape_programs(data['programs'])
        prev = reshape_previews(data['previews'])
        res = reshape_results(data['results'])
        
        if prev.empty or prog.empty or res.empty:
            print(f'✗ {date_str}: Empty dataframe')
            continue
        
        # Step 1: Merge previews + programs
        # レースコードと艇番の両方でマッチング
        # 重複する列を事前に処理
        prog_cols = set(prog.columns)
        prev_cols = set(prev.columns)
        overlap_cols = prog_cols & prev_cols - {'レースコード', '艇番'}
        
        # 重複する列を prog から削除（prev を優先）
        prog_to_merge = prog.drop(columns=list(overlap_cols))
        
        merged = prev.merge(
            prog_to_merge,
            on=['レースコード', '艇番'],
            how='left'
        )
        
        if merged.empty:
            print(f'✗ {date_str}: No match between previews and programs')
            continue
        
        # Step 2: Merge with results
        merged = merged.merge(
            res[['レースコード', '艇番', '着順']],
            on=['レースコード', '艇番'],
            how='left'
        )
        
        merged['日付'] = date_str
        combined_data.append(merged)
        
        # Count features (columns not in metadata)
        metadata_cols = {'レースコード', 'レース日', 'レース場', 'レース回', '艇番', '枠', '日付', '着順'}
        feature_count = len([c for c in merged.columns if c not in metadata_cols])
        print(f'✓ {date_str}: {merged.shape} (features: {feature_count})')
    except Exception as e:
        print(f'✗ {date_str}: {type(e).__name__}: {str(e)[:80]}')

if combined_data:
    final_data = pd.concat(combined_data, ignore_index=True)
    print(f'\nFinal: {final_data.shape}')
    print(f'Dates: {final_data["日付"].nunique()}')
    print(f'Stadiums: {final_data["レース場"].nunique()}')
else:
    print('No data merged')

✓ 01: (432, 47) (features: 39)
✓ 02: (360, 47) (features: 39)
✓ 03: (360, 47) (features: 39)
✓ 04: (576, 47) (features: 39)
✓ 05: (648, 47) (features: 39)
✓ 06: (576, 47) (features: 39)
✓ 07: (504, 47) (features: 39)
✓ 08: (504, 47) (features: 39)
✓ 09: (660, 47) (features: 39)
✓ 10: (642, 47) (features: 39)
✓ 11: (504, 47) (features: 39)
✓ 12: (504, 47) (features: 39)
✓ 13: (648, 47) (features: 39)
✓ 14: (720, 47) (features: 39)
✓ 15: (660, 47) (features: 39)
✓ 16: (498, 47) (features: 39)
✓ 17: (564, 47) (features: 39)
✓ 18: (516, 47) (features: 39)
✓ 19: (492, 47) (features: 39)
✓ 20: (552, 47) (features: 39)
✓ 21: (504, 47) (features: 39)
✓ 22: (720, 47) (features: 39)
✓ 23: (720, 47) (features: 39)
✓ 24: (1008, 47) (features: 39)
✓ 25: (648, 47) (features: 39)
✓ 26: (648, 47) (features: 39)
✓ 27: (636, 47) (features: 39)
✓ 28: (642, 47) (features: 39)
✓ 29: (714, 47) (features: 39)
✓ 30: (846, 47) (features: 39)
✓ 31: (792, 47) (features: 39)

Final: (18798, 47)
Dates: 31
Stadiums

### 3. 特徴量準備

In [5]:
exclude_cols = {'レースコード', 'レース日', 'レース場', 'レース回', '枠', '艇番', '日付', '着順'}
# カテゴリカル列を除外
categorical_cols = {'風向', '天候'}

numeric_cols = []
for col in final_data.columns:
    if col not in exclude_cols and col not in categorical_cols:
        try:
            # 実際に数値に変換できるかテスト
            pd.to_numeric(final_data[col], errors='coerce')
            numeric_cols.append(col)
        except:
            pass

# Handle NaN values - fill with median for each column
X = final_data[numeric_cols].copy()

# Convert to numeric with coercion (converts non-numeric to NaN)
for col in X.columns:
    X[col] = pd.to_numeric(X[col], errors='coerce')

# Fill NaN with median
for col in X.columns:
    median_val = X[col].median()
    if pd.isna(median_val):
        # If all values are NaN, use 0
        X[col].fillna(0, inplace=True)
    else:
        X[col].fillna(median_val, inplace=True)

# Verify no NaN remains
total_nan = X.isna().sum().sum()
print(f'Total NaN count after filling: {total_nan}')

y = final_data['着順']
stadiums = sorted(final_data['レース場'].unique())

print(f'\nFeatures: {len(numeric_cols)}')
print(f'Samples: {len(X)}')
print(f'Stadiums: {len(stadiums)}')
print(f'Target missing: {y.isna().sum()}')

Total NaN count after filling: 0

Features: 37
Samples: 18798
Stadiums: 21
Target missing: 584


### 4. モデル学習

In [6]:
results_summary = []
errors = []

for stadium in stadiums:
    mask = final_data['レース場'] == stadium
    X_std = X[mask].reset_index(drop=True)
    y_std = y[mask].reset_index(drop=True)
    
    # Remove missing targets
    valid = y_std.notna()
    X_std = X_std[valid].reset_index(drop=True)
    y_std = y_std[valid].reset_index(drop=True)
    
    if len(X_std) < 10:
        print(f'{stadium}: insufficient data ({len(X_std)} samples)')
        continue
    
    X_train, X_test, y_train, y_test = train_test_split(
        X_std, y_std, test_size=0.3, random_state=42
    )
    
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)
    
    # GBR
    try:
        gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
        gbr.fit(X_train_s, y_train)
        mae = mean_absolute_error(y_test, gbr.predict(X_test_s))
        results_summary.append({'stadium': stadium, 'model': 'GBR', 'mae': mae, 'samples': len(X_std)})
    except Exception as e:
        errors.append(f'Stadium {stadium} GBR: {type(e).__name__}: {str(e)[:50]}')
    
    # GBC
    try:
        gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
        gbc.fit(X_train_s, y_train)
        acc = accuracy_score(y_test, gbc.predict(X_test_s))
        results_summary.append({'stadium': stadium, 'model': 'GBC', 'acc': acc, 'samples': len(X_std)})
    except Exception as e:
        errors.append(f'Stadium {stadium} GBC: {type(e).__name__}: {str(e)[:50]}')

if results_summary:
    results_df = pd.DataFrame(results_summary)
    print('Training completed')
    print(results_df)
else:
    print('No results')
    
if errors:
    print('\nErrors:')
    for err in errors:
        print(f'  {err}')

Training completed
    stadium model       mae  samples       acc
0         1   GBR  1.394577      371       NaN
1         1   GBC       NaN      371  0.160714
2         2   GBR  1.187518     1494       NaN
3         2   GBC       NaN     1494  0.242762
4         3   GBR  1.158531     1113       NaN
5         3   GBC       NaN     1113  0.311377
6         4   GBR  1.383277      401       NaN
7         4   GBC       NaN      401  0.223140
8         5   GBR  1.242250     1354       NaN
9         5   GBC       NaN     1354  0.280098
10        6   GBR  1.140908     1141       NaN
11        6   GBC       NaN     1141  0.268222
12        7   GBR  1.160620     1563       NaN
13        7   GBC       NaN     1563  0.304904
14        8   GBR  1.242461     1476       NaN
15        8   GBC       NaN     1476  0.270880
16        9   GBR  1.177126     1241       NaN
17        9   GBC       NaN     1241  0.268097
18       10   GBR  1.150523     1106       NaN
19       10   GBC       NaN     1106  0.3

In [7]:
# Store models and scaler info
models_dict = {}

for stadium in stadiums:
    mask = final_data['レース場'] == stadium
    X_std = X[mask].reset_index(drop=True)
    y_std = y[mask].reset_index(drop=True)
    
    # Remove missing targets
    valid = y_std.notna()
    X_std = X_std[valid].reset_index(drop=True)
    y_std = y_std[valid].reset_index(drop=True)
    
    if len(X_std) < 10:
        continue
    
    X_train, X_test, y_train, y_test = train_test_split(
        X_std, y_std, test_size=0.3, random_state=42
    )
    
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)
    
    # Train and save GBC model
    gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
    gbc.fit(X_train_s, y_train)
    
    models_dict[stadium] = {
        'model': gbc,
        'scaler': scaler,
        'features': numeric_cols
    }

# Save models
model_save_path = repo_root / 'models' / 'stadium_models.pkl'
model_save_path.parent.mkdir(parents=True, exist_ok=True)

with open(model_save_path, 'wb') as f:
    pickle.dump(models_dict, f)

print(f'Saved {len(models_dict)} models to {model_save_path}')

Saved 21 models to /Users/mahiguch/dev/boatrace/data/models/stadium_models.pkl


## 2026年1月1日のデータで予想

In [8]:
# Load data for 2026-01-01
test_date = '2026-01-01'
year_test = '2026'
month_test = '01'
day_test = '01'

prog_path = repo_root / 'data' / 'programs' / year_test / month_test / f'{day_test}.csv'
prev_path = repo_root / 'data' / 'previews' / year_test / month_test / f'{day_test}.csv'
res_path = repo_root / 'data' / 'results' / year_test / month_test / f'{day_test}.csv'

# Check if files exist
if not all([prog_path.exists(), prev_path.exists(), res_path.exists()]):
    print(f'Missing files for {test_date}:')
    print(f'  programs: {prog_path.exists()}')
    print(f'  previews: {prev_path.exists()}')
    print(f'  results: {res_path.exists()}')
else:
    prog_test = pd.read_csv(prog_path)
    prev_test = pd.read_csv(prev_path)
    res_test = pd.read_csv(res_path)
    
    print(f'Loaded data for {test_date}:')
    print(f'  Programs: {prog_test.shape}')
    print(f'  Previews: {prev_test.shape}')
    print(f'  Results: {res_test.shape}')

Loaded data for 2026-01-01:
  Programs: (154, 177)
  Previews: (118, 53)
  Results: (153, 100)


### 2. データ変形とマージ

In [9]:
# Reshape and merge test data
prog_reshaped = reshape_programs(prog_test)
prev_reshaped = reshape_previews(prev_test)
res_reshaped = reshape_results(res_test)

print(f'Reshaped:')
print(f'  Programs: {prog_reshaped.shape}')
print(f'  Previews: {prev_reshaped.shape}')
print(f'  Results: {res_reshaped.shape}')

# Debug: Check columns
print(f'\nDebug columns:')
print(f'  "レース場" in previews: {"レース場" in prev_reshaped.columns}')
print(f'  "レース場" in programs: {"レース場" in prog_reshaped.columns}')

# Handle overlapping columns before merge
prog_cols = set(prog_reshaped.columns)
prev_cols = set(prev_reshaped.columns)
overlap_cols = (prog_cols & prev_cols) - {'レースコード', '艇番'}

print(f'  Overlapping columns (to remove from programs): {overlap_cols}')

# Remove overlapping columns from programs (keep previews version)
prog_to_merge = prog_reshaped.drop(columns=list(overlap_cols))

# Merge without suffixes since we removed overlap
test_data = prev_reshaped.merge(
    prog_to_merge,
    on=['レースコード', '艇番'],
    how='left'
)

test_data = test_data.merge(
    res_reshaped[['レースコード', '艇番', '着順']],
    on=['レースコード', '艇番'],
    how='left'
)

# Verify columns
print(f'\nMerged test data: {test_data.shape}')
print(f'Contains レース場: {"レース場" in test_data.columns}')
if 'レース場' in test_data.columns:
    print(f'Unique stadiums: {test_data["レース場"].nunique()}')
print(f'Actual results available: {test_data["着順"].notna().sum()} rows')

Reshaped:
  Programs: (924, 33)
  Previews: (708, 17)
  Results: (900, 3)

Debug columns:
  "レース場" in previews: True
  "レース場" in programs: True
  Overlapping columns (to remove from programs): {'レース回', 'レース場', 'レース日'}

Merged test data: (708, 46)
Contains レース場: True
Unique stadiums: 10
Actual results available: 687 rows


### 3. 予想実行

In [10]:
# Load saved models
with open(model_save_path, 'rb') as f:
    models_dict = pickle.load(f)

print(f'Loaded {len(models_dict)} models')

# Prepare features for prediction
X_test_pred = test_data[numeric_cols].copy()
for col in X_test_pred.columns:
    X_test_pred[col] = pd.to_numeric(X_test_pred[col], errors='coerce')

for col in X_test_pred.columns:
    median_val = X_test_pred[col].median()
    if pd.isna(median_val):
        X_test_pred[col].fillna(0, inplace=True)
    else:
        X_test_pred[col].fillna(median_val, inplace=True)

# Function to generate Sanrentan prediction from probabilities
def predict_sanrentan(model, scaler, X_row, stadiums_set):
    """
    各艇の確率を計算して、上位3艇を選び三連単を生成
    """
    X_scaled = scaler.transform(X_row)
    
    # predict_proba で各着順の確率を取得
    proba = model.predict_proba(X_scaled)[0]
    classes = model.classes_
    
    # 確率をソート
    prob_dict = {cls: prob for cls, prob in zip(classes, proba)}
    sorted_probs = sorted(prob_dict.items(), key=lambda x: x[1], reverse=True)
    
    # 上位3つの着順を抽出
    top_3 = sorted_probs[:3]
    
    return top_3

# Make predictions for Sanrentan
sanrentan_predictions = []

for idx, row in test_data.iterrows():
    stadium = row['レース場']
    boat_num = row['艇番']
    
    # Skip if no model for this stadium
    if stadium not in models_dict:
        sanrentan_predictions.append(None)
        continue
    
    model_info = models_dict[stadium]
    model = model_info['model']
    scaler = model_info['scaler']
    
    # Prepare features
    X_row = X_test_pred.iloc[idx:idx+1]
    
    try:
        top_3 = predict_sanrentan(model, scaler, X_row, stadium)
        # 艇番を確率でソート
        top_boats = [int(boat) for boat, _ in top_3]
        sanrentan_predictions.append(tuple(top_boats))
    except:
        sanrentan_predictions.append(None)

test_data['予想三連単'] = sanrentan_predictions

print(f'\nSanrentan predictions completed: {test_data["予想三連単"].notna().sum()} rows')
print(f'Sample predictions:')
print(test_data[['レースコード', '艇番', '予想三連単']].head(10))

Loaded 21 models

Sanrentan predictions completed: 708 rows
Sample predictions:
         レースコード  艇番      予想三連単
0  202601010601   1  (1, 2, 6)
1  202601010602   1  (2, 5, 1)
2  202601010603   1  (1, 2, 3)
3  202601010604   1  (1, 3, 4)
4  202601010605   1  (6, 2, 1)
5  202601010606   1  (1, 3, 2)
6  202601010607   1  (2, 1, 3)
7  202601010608   1  (1, 2, 3)
8  202601010609   1  (1, 2, 3)
9  202601010610   1  (1, 2, 5)


### 4. 的中率の計算（三連単）

In [11]:
# レースコード単位で三連単の着順を取得
def get_actual_sanrentan(race_results):
    """
    レースの結果から実際の三連単を取得
    race_results: レースの1着～3着の艇番
    """
    if len(race_results) >= 3:
        return tuple(race_results[:3])
    return None

# レースコードごとに実績三連単を計算
race_actual_sanrentan = {}
for race_code in test_data['レースコード'].unique():
    race_mask = test_data['レースコード'] == race_code
    race_subset = test_data[race_mask].sort_values('着順')
    
    # 1着～3着の艇番を取得
    sanrentan = tuple(race_subset[race_subset['着順'].notna()].head(3)['艇番'].astype(int).values)
    if len(sanrentan) == 3:
        race_actual_sanrentan[race_code] = sanrentan

print(f'Race-level Sanrentan results: {len(race_actual_sanrentan)} races')

# レース単位での予想を集計
race_predictions = {}
for race_code in test_data['レースコード'].unique():
    race_mask = test_data['レースコード'] == race_code
    race_subset = test_data[race_mask]
    
    # 最初の行から予想三連単を取得（すべての艇で同じレースなので最初の行でよい）
    if race_subset['予想三連単'].notna().any():
        race_predictions[race_code] = race_subset['予想三連単'].iloc[0]

print(f'Race-level predictions: {len(race_predictions)} races')

# 的中判定
sanrentan_matches = []
for race_code in race_actual_sanrentan.keys():
    if race_code in race_predictions:
        actual = race_actual_sanrentan[race_code]
        predicted = race_predictions[race_code]
        
        is_match = (predicted == actual)
        sanrentan_matches.append({
            'レースコード': race_code,
            '予想三連単': predicted,
            '実績三連単': actual,
            '的中': is_match
        })

if sanrentan_matches:
    sanrentan_df = pd.DataFrame(sanrentan_matches)
    correct = sanrentan_df['的中'].sum()
    total = len(sanrentan_df)
    accuracy = correct / total if total > 0 else 0
    
    print(f'\n三連単的中率: {correct}/{total} = {accuracy:.2%}')
    print(f'\n結果詳細:')
    print(sanrentan_df.to_string(index=False))
else:
    print('的中判定可能なデータなし')

Race-level Sanrentan results: 116 races
Race-level predictions: 118 races

三連単的中率: 5/116 = 4.31%

結果詳細:
      レースコード     予想三連単     実績三連単    的中
202601010601 (1, 2, 6) (1, 3, 2) False
202601010602 (2, 5, 1) (5, 4, 3) False
202601010603 (1, 2, 3) (1, 3, 2) False
202601010604 (1, 3, 4) (2, 5, 3) False
202601010605 (6, 2, 1) (2, 3, 6) False
202601010606 (1, 3, 2) (1, 6, 5) False
202601010607 (2, 1, 3) (2, 1, 3)  True
202601010608 (1, 2, 3) (4, 2, 1) False
202601010609 (1, 2, 3) (5, 2, 3) False
202601010610 (1, 2, 5) (1, 2, 5)  True
202601010611 (1, 3, 2) (1, 2, 3) False
202601010612 (1, 2, 3) (3, 5, 1) False
202601010701 (1, 2, 3) (1, 2, 3)  True
202601010702 (1, 4, 3) (1, 2, 4) False
202601010703 (3, 1, 5) (4, 6, 3) False
202601010704 (2, 4, 3) (5, 6, 3) False
202601010705 (5, 1, 6) (6, 5, 4) False
202601010706 (1, 2, 4) (1, 5, 3) False
202601010707 (1, 2, 4) (1, 4, 3) False
202601010708 (1, 4, 3) (1, 2, 6) False
202601010709 (1, 2, 4) (5, 4, 3) False
202601010710 (3, 1, 4) (1, 6, 3) False

In [12]:
### 5. 推定結果を CSV に出力

# レース単位で三連単の予想を整形して出力
output_records = []

for race_code in sorted(race_predictions.keys()):
    predicted_sanrentan = race_predictions[race_code]
    if predicted_sanrentan is not None:
        output_records.append({
            'レースコード': race_code,
            '予想1着': predicted_sanrentan[0],
            '予想2着': predicted_sanrentan[1],
            '予想3着': predicted_sanrentan[2]
        })

if output_records:
    output_df = pd.DataFrame(output_records)
    
    # 出力ディレクトリを作成
    output_dir = repo_root / 'data' / 'estimate' / year_test / month_test
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # ファイル名
    output_path = output_dir / f'{day_test}.csv'
    
    # CSV に出力
    output_df.to_csv(output_path, index=False)
    
    print(f'Output saved to: {output_path}')
    print(f'Total predictions: {len(output_df)}')
    print(f'\nSample output:')
    print(output_df.head(10))
else:
    print('No predictions to output')

Output saved to: /Users/mahiguch/dev/boatrace/data/data/estimate/2026/01/01.csv
Total predictions: 118

Sample output:
         レースコード  予想1着  予想2着  予想3着
0  202601010601     1     2     6
1  202601010602     2     5     1
2  202601010603     1     2     3
3  202601010604     1     3     4
4  202601010605     6     2     1
5  202601010606     1     3     2
6  202601010607     2     1     3
7  202601010608     1     2     3
8  202601010609     1     2     3
9  202601010610     1     2     5


In [15]:
# Create detailed comparison results in the same format as confirmation file
results_list = []

for race_code in sorted(race_actual_sanrentan.keys()):
    if race_code in race_predictions:
        predicted = race_predictions[race_code]
        actual = race_actual_sanrentan[race_code]
        
        # Extract each position
        pred_1st, pred_2nd, pred_3rd = predicted[0], predicted[1], predicted[2]
        actual_1st, actual_2nd, actual_3rd = actual[0], actual[1], actual[2]
        
        # Check matches (○ or ×)
        match_1st = '○' if pred_1st == actual_1st else '×'
        match_2nd = '○' if pred_2nd == actual_2nd else '×'
        match_3rd = '○' if pred_3rd == actual_3rd else '×'
        match_all = '○' if (pred_1st == actual_1st and pred_2nd == actual_2nd and pred_3rd == actual_3rd) else '×'
        
        results_list.append({
            'レースコード': race_code,
            '予想1着': pred_1st,
            '予想2着': pred_2nd,
            '予想3着': pred_3rd,
            '実際1着': actual_1st,
            '実際2着': actual_2nd,
            '実際3着': actual_3rd,
            '1着的中': match_1st,
            '2着的中': match_2nd,
            '3着的中': match_3rd,
            '全的中': match_all
        })

if results_list:
    results_df = pd.DataFrame(results_list)
    
    # Calculate statistics
    total_races = len(results_df)
    match_1st_count = (results_df['1着的中'] == '○').sum()
    match_2nd_count = (results_df['2着的中'] == '○').sum()
    match_3rd_count = (results_df['3着的中'] == '○').sum()
    match_all_count = (results_df['全的中'] == '○').sum()
    
    print('=== 的中率レポート ===')
    print(f'レース数: {total_races}')
    print(f'1着的中: {match_1st_count}/{total_races} ({match_1st_count/total_races:.1%})')
    print(f'2着的中: {match_2nd_count}/{total_races} ({match_2nd_count/total_races:.1%})')
    print(f'3着的中: {match_3rd_count}/{total_races} ({match_3rd_count/total_races:.1%})')
    print(f'三連単的中: {match_all_count}/{total_races} ({match_all_count/total_races:.1%})')
    
    print(f'\n=== 詳細結果 ===')
    print(results_df.to_string(index=False))
else:
    print('No results to display')

=== 的中率レポート ===
レース数: 116
1着的中: 51/116 (44.0%)
2着的中: 30/116 (25.9%)
3着的中: 21/116 (18.1%)
三連単的中: 5/116 (4.3%)

=== 詳細結果 ===
      レースコード  予想1着  予想2着  予想3着  実際1着  実際2着  実際3着 1着的中 2着的中 3着的中 全的中
202601010601     1     2     6     1     3     2    ○    ×    ×   ×
202601010602     2     5     1     5     4     3    ×    ×    ×   ×
202601010603     1     2     3     1     3     2    ○    ×    ×   ×
202601010604     1     3     4     2     5     3    ×    ×    ×   ×
202601010605     6     2     1     2     3     6    ×    ×    ×   ×
202601010606     1     3     2     1     6     5    ○    ×    ×   ×
202601010607     2     1     3     2     1     3    ○    ○    ○   ○
202601010608     1     2     3     4     2     1    ×    ○    ×   ×
202601010609     1     2     3     5     2     3    ×    ○    ○   ×
202601010610     1     2     5     1     2     5    ○    ○    ○   ○
202601010611     1     3     2     1     2     3    ○    ×    ×   ×
202601010612     1     2     3     3     5     1    ×    ×   