# Programs のみを用いた着順予想モデル

対象：2016年～2025年のデータを使用して、ボートレース場ごとに着順予想モデルを構築。
- データソース: programs のみ（風向、天候は使用しない）
- 学習対象: 日次が3日目以降のレースのみ
- モデル: GradientBoostingClassifier（レース場ごと）
- 予想・的中率検証: 2026年1月1日～30日

## セットアップ

In [1]:
from pathlib import Path
import calendar
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
import warnings
warnings.filterwarnings('ignore')

print('Setup complete')

Setup complete


## データ変形関数の定義

In [2]:
def reshape_programs(df):
    """
    Programs を艇単位に変形
    各艇について、レース情報と艇固有情報（選手、モーター、ボート）を1行に
    """
    frames = []
    race_cols = ['レースコード', '日次', 'レース日', 'レース場', 'レース回']
    
    for frame in range(1, 7):
        prefix = f'{frame}枠_'
        cols = [c for c in df.columns if c.startswith(prefix)]
        if cols:
            tmp = df[race_cols + cols].copy()
            tmp.columns = race_cols + [c[len(prefix):] for c in cols]
            tmp['枠'] = frame
            frames.append(tmp)
    
    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()

def reshape_results(df):
    """
    Results を艇単位に変形
    着順情報を艇番とマッチングして1行に集約
    新形式: 〇着_艇番 というカラム形式に対応
    """
    result_list = []
    
    for idx, row in df.iterrows():
        race_code = row['レースコード']
        
        # Try to find boat numbers for each position
        for place in range(1, 7):
            boat_col = f'{place}着_艇番'
            
            # Check if column exists
            if boat_col not in df.columns:
                continue
                
            boat_num = row[boat_col]
            
            # Skip if boat number is NaN or invalid
            if pd.isna(boat_num):
                continue
            
            # Handle both int and float types
            try:
                boat_num = int(boat_num)
                if boat_num < 1 or boat_num > 6:
                    continue
                    
                result_list.append({
                    'レースコード': race_code,
                    '艇番': boat_num,
                    '着順': place
                })
            except (ValueError, TypeError):
                continue
    
    return pd.DataFrame(result_list) if result_list else pd.DataFrame()

def extract_day_number(day_str):
    """
    日次文字列から数値を抽出
    '第1日' -> 1, '第2日' -> 2, etc.
    """
    if pd.isna(day_str):
        return np.nan
    day_str = str(day_str)
    if '第' in day_str and '日' in day_str:
        try:
            return int(day_str.replace('第', '').replace('日', ''))
        except:
            return np.nan
    return np.nan

print('Reshape functions ready')
# Stadium name to number mapping

# Stadium code from レースコード to standard stadium number (1-24)
RACE_CODE_TO_STADIUM = {
    0: 17,   # 唐津
    6: 6,    # 浜名湖
    7: 7,    # 蒲郡
    8: 8,    # 常滑
    9: 9,    # 津
    10: 10,  # 三国
    16: 13,  # 丸亀
    19: 21,  # 徳山
    20: 20,  # 下関
    22: 19,  # 芦屋
    24: 18,  # 大村
}

def extract_stadium_from_race_code(race_code):
    """
    レースコードから競艇場番号を抽出
    レースコード形式: YYYYMMDDCCRRwhere CC is stadium code (positions 8-10)
    """
    if pd.isna(race_code):
        return np.nan
    race_code_str = str(race_code)
    if len(race_code_str) >= 10:
        try:
            stadium_code = int(race_code_str[8:10])
            return RACE_CODE_TO_STADIUM.get(stadium_code, np.nan)
        except:
            return np.nan
    return np.nan

print('Stadium extraction from race code ready')

# Stadium name to standard number mapping (1-24)
# Note: びわこ can be written as 琵琶湖 in some data versions
STADIUM_NAME_TO_NUMBER = {
    'ボートレース桐生': 1,
    'ボートレース戸田': 2,
    'ボートレース江戸川': 3,
    'ボートレース平和島': 4,
    'ボートレース多摩川': 5,
    'ボートレース浜名湖': 6,
    'ボートレース蒲郡': 7,
    'ボートレース常滑': 8,
    'ボートレース津': 9,
    'ボートレース三国': 10,
    'ボートレースびわこ': 11,
    'ボートレース琵琶湖': 11,  # Alternative name for びわこ
    'ボートレース住之江': 12,
    'ボートレース尼崎': 13,
    'ボートレース鳴門': 14,
    'ボートレース丸亀': 15,
    'ボートレース児島': 16,
    'ボートレース宮島': 17,
    'ボートレース徳山': 18,
    'ボートレース下関': 19,
    'ボートレース若松': 20,
    'ボートレース芦屋': 21,
    'ボートレース福岡': 22,
    'ボートレース唐津': 23,
    'ボートレース大村': 24,
}

def map_stadium_name_to_number(stadium_name):
    """
    競艇場の名前から標準番号（1-24）に変換
    """
    if pd.isna(stadium_name):
        return np.nan
    stadium_name = str(stadium_name).strip()
    return STADIUM_NAME_TO_NUMBER.get(stadium_name, np.nan)

print('Stadium name to number mapping ready')

Reshape functions ready
Stadium extraction from race code ready
Stadium name to number mapping ready


## 2016～2025年のデータで学習

### 1. データ読み込み（2016～2025年）

In [3]:
cwd = Path.cwd()
repo_root = cwd if (cwd / 'data').exists() else cwd.parent.parent

print(f'Current working directory: {cwd}')
print(f'Repository root: {repo_root}')

# Load data for 2016-2025
all_data = {}
years = [str(y) for y in range(2016, 2026)]

for year in years:
    for month in range(1, 13):
        # Get the number of days in this month
        _, max_day = calendar.monthrange(int(year), month)
        for day in range(1, max_day + 1):
            month_str = f'{month:02d}'
            day_str = f'{day:02d}'
            prog_path = repo_root / 'data' / 'programs' / year / month_str / f'{day_str}.csv'
            res_path = repo_root / 'data' / 'results' / year / month_str / f'{day_str}.csv'
            
            if prog_path.exists() and res_path.exists():
                date_key = f'{year}-{month_str}-{day_str}'
                try:
                    all_data[date_key] = {
                        'programs': pd.read_csv(prog_path),
                        'results': pd.read_csv(res_path)
                    }
                except Exception as e:
                    print(f'Error loading {date_key}: {e}')

print(f'Loaded {len(all_data)} days (2016-2025)')

Current working directory: /Users/mahiguch/dev/boatrace/data/docs/notebooks
Repository root: /Users/mahiguch/dev/boatrace/data
Loaded 3649 days (2016-2025)


### 2. データ統合（stadium 1-24のみ）

In [4]:
combined_data = []
errors = []

print(f'Processing {len(all_data)} days...')
processed_count = 0
skipped_count = 0

for date_str, data in all_data.items():
    try:
        prog = reshape_programs(data['programs'])
        res = reshape_results(data['results'])
        
        if prog.empty or res.empty:
            continue
        
        # Extract day number
        prog['日次数'] = prog['日次'].apply(extract_day_number)
        
        # Map stadium name to number
        prog['レース場'] = prog['レース場'].apply(map_stadium_name_to_number)
        
        # Remove rows with unknown stadium
        prog = prog[prog['レース場'].notna()].reset_index(drop=True)
        
        if prog.empty:
            skipped_count += 1
            continue
        
        # Merge with results using レースコード
        merged = prog.merge(
            res[['レースコード', '艇番', '着順']],
            on=['レースコード', '艇番'],
            how='left'
        )
        
        combined_data.append(merged)
        processed_count += 1
        
    except Exception as e:
        errors.append((date_str, type(e).__name__))

print(f'✓ Processed {processed_count} days successfully')
if skipped_count > 0:
    print(f'⚠ Skipped {skipped_count} days (no mapped stadiums)')
if errors:
    print(f'✗ Errors: {len(errors)}')

if combined_data:
    final_data = pd.concat(combined_data, ignore_index=True)
    print(f'\n✓ Final: {final_data.shape}')
    print(f'✓ Unique dates: {final_data["レース日"].nunique()}')
    stadiums = sorted(final_data["レース場"].dropna().unique())
    print(f'✓ Stadiums: {[int(s) for s in stadiums]}')
    print(f'✓ Stadium count: {len(stadiums)}')
    print(f'✓ Target missing: {final_data["着順"].isna().sum()} rows')
else:
    print('\n✗ ERROR: No data merged!')

Processing 3649 days...
✓ Processed 3649 days successfully

✓ Final: (3309152, 36)
✓ Unique dates: 3649
✓ Stadiums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
✓ Stadium count: 24
✓ Target missing: 807114 rows


### 3. 特徴量準備

In [5]:
# Check if final_data exists
if 'final_data' not in locals():
    print('\n' + '='*70)
    print('ERROR: final_data not defined')
    print('='*70)
    print('\nPossible causes:')
    print('1. Cell-7 failed to load data (all_data is empty)')
    print('2. Cell-8 failed to merge data (combined_data is empty)')
    print('3. Stadium filter excluded all rows')
    print('\nAction: Run cell-7 and cell-8 again and check their output above.')
    print('='*70)
    raise NameError('final_data not defined - check cells 7 and 8')

print('✓ final_data loaded successfully')

if 'final_data' not in locals():
    print('ERROR: final_data not defined. Check cell-7 and cell-8.')
    print('This usually means data loading or merging failed.')
    raise NameError('final_data not defined')

exclude_cols = {
    'レースコード', '日次', 'レース日', 'レース場', 'レース回',
    '艇番', '登録番号', '選手名', '支部',
    '枠', '着順', '日次数',
    'モーター番号', 'ボート番号'  # IDs, not features
}

categorical_cols = {'級別'}

numeric_cols = []
for col in final_data.columns:
    if col not in exclude_cols and col not in categorical_cols:
        try:
            pd.to_numeric(final_data[col], errors='coerce')
            numeric_cols.append(col)
        except:
            pass

X = final_data[numeric_cols].copy()

for col in X.columns:
    X[col] = pd.to_numeric(X[col], errors='coerce')

for col in X.columns:
    median_val = X[col].median()
    if pd.isna(median_val):
        X[col].fillna(0, inplace=True)
    else:
        X[col].fillna(median_val, inplace=True)

if '級別' in final_data.columns:
    le_grade = LabelEncoder()
    X['級別_encoded'] = le_grade.fit_transform(final_data['級別'].fillna('未知'))

X['日次数'] = final_data['日次数'].fillna(1).astype(int)

total_nan = X.isna().sum().sum()
print(f'Total NaN count after filling: {total_nan}')

y = final_data['着順']
stadiums = sorted(final_data['レース場'].unique())

print(f'\nFeatures: {len(X.columns)}')
print(f'Samples: {len(X)}')
print(f'Stadiums: {len(stadiums)}')
print(f'Target missing: {y.isna().sum()}')

✓ final_data loaded successfully
Total NaN count after filling: 0

Features: 23
Samples: 3309152
Stadiums: 24
Target missing: 807114


### 4. モデル学習

In [6]:
results_summary = []

for stadium in stadiums:
    mask = final_data['レース場'] == stadium
    X_std = X[mask].reset_index(drop=True)
    y_std = y[mask].reset_index(drop=True)
    
    # Remove missing targets
    valid = y_std.notna()
    X_std = X_std[valid].reset_index(drop=True)
    y_std = y_std[valid].reset_index(drop=True)
    
    if len(X_std) < 10:
        print(f'Stadium {int(stadium)}: insufficient data ({len(X_std)} samples)')
        continue
    
    X_train, X_test, y_train, y_test = train_test_split(
        X_std, y_std, test_size=0.3, random_state=42
    )
    
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)
    
    # GBC
    try:
        gbc = GradientBoostingClassifier(
            n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42
        )
        gbc.fit(X_train_s, y_train)
        acc = accuracy_score(y_test, gbc.predict(X_test_s))
        results_summary.append({'stadium': int(stadium), 'accuracy': acc, 'samples': len(X_std)})
    except Exception as e:
        print(f'Stadium {int(stadium)} error: {type(e).__name__}: {str(e)[:50]}')

if results_summary:
    results_df = pd.DataFrame(results_summary)
    print('\n=== モデル学習結果 ===')
    print(results_df.to_string(index=False))
    print(f'\n成功: {len(results_df)} / {len(stadiums)} スタジアム')
else:
    print('学習失敗')


=== モデル学習結果 ===
 stadium  accuracy  samples
       1  0.245026   135549
       2  0.239704   136862
       3  0.228728   124337
       4  0.240525   127345
       5  0.237856   120633
       6  0.245498   141780
       7  0.244620   137082
       8  0.247745   141529
       9  0.242493   123547
      10  0.254109   133840
      11  0.251832    66869
      12  0.237324   135299
      13  0.179033    64288
      14  0.178766    69290
      15  0.182146    74903
      16  0.189448    70379
      17  0.187979    70821
      18  0.188289    67341
      19  0.193120    61239
      20  0.186136    70734
      21  0.193711    67316
      22  0.184274    56383
      23  0.248160   128626
      24  0.238895   176046

成功: 24 / 24 スタジアム


### 5. モデル保存

In [7]:
# Store models and scaler info
models_dict = {}

for stadium in stadiums:
    mask = final_data['レース場'] == stadium
    X_std = X[mask].reset_index(drop=True)
    y_std = y[mask].reset_index(drop=True)
    
    # Remove missing targets
    valid = y_std.notna()
    X_std = X_std[valid].reset_index(drop=True)
    y_std = y_std[valid].reset_index(drop=True)
    
    if len(X_std) < 10:
        continue
    
    X_train, X_test, y_train, y_test = train_test_split(
        X_std, y_std, test_size=0.3, random_state=42
    )
    
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    
    # Train GBC model
    gbc = GradientBoostingClassifier(
        n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42
    )
    gbc.fit(X_train_s, y_train)
    
    models_dict[stadium] = {
        'model': gbc,
        'scaler': scaler,
        'features': list(X.columns)
    }

# Save models
model_save_path = repo_root / 'models' / 'program_models.pkl'
model_save_path.parent.mkdir(parents=True, exist_ok=True)

with open(model_save_path, 'wb') as f:
    pickle.dump(models_dict, f)

print(f'✓ 保存: {len(models_dict)} モデルを {model_save_path} に保存')

✓ 保存: 24 モデルを /Users/mahiguch/dev/boatrace/data/models/program_models.pkl に保存


## 2026年1月1日～30日のデータで予想

### 1. テストデータ読み込み

In [8]:
# Load data for 2026-01
test_data_list = []

year_test = '2026'
month_test = '01'
month_num = int(month_test)
year_num = int(year_test)

_, max_day = calendar.monthrange(year_num, month_num)

for day in range(1, max_day + 1):
    day_str = f'{day:02d}'
    prog_path = repo_root / 'data' / 'programs' / year_test / month_test / f'{day_str}.csv'
    res_path = repo_root / 'data' / 'results' / year_test / month_test / f'{day_str}.csv'
    
    if prog_path.exists() and res_path.exists():
        try:
            prog_test = pd.read_csv(prog_path)
            res_test = pd.read_csv(res_path)
            test_data_list.append((day_str, prog_test, res_test))
        except Exception as e:
            print(f'Error loading {year_test}-{month_test}-{day_str}: {e}')

print(f'✓ Loaded {len(test_data_list)} days for 2026-01')

✓ Loaded 30 days for 2026-01


### 2. テストデータの変形とマージ

In [9]:
# Reshape and merge test data
test_combined = []

for day, prog_test, res_test in test_data_list:
    prog_reshaped = reshape_programs(prog_test)
    res_reshaped = reshape_results(res_test)
    
    if prog_reshaped.empty or res_reshaped.empty:
        continue
    
    # Extract day number
    prog_reshaped['日次数'] = prog_reshaped['日次'].apply(extract_day_number)
    
    # Map stadium name to number
    prog_reshaped['レース場'] = prog_reshaped['レース場'].apply(map_stadium_name_to_number)
    
    # Remove rows with unknown stadium
    prog_reshaped = prog_reshaped[prog_reshaped['レース場'].notna()].reset_index(drop=True)
    
    if prog_reshaped.empty:
        continue
    
    # Merge
    test_data = prog_reshaped.merge(
        res_reshaped[['レースコード', '艇番', '着順']],
        on=['レースコード', '艇番'],
        how='left'
    )
    
    test_combined.append(test_data)

if test_combined:
    test_data = pd.concat(test_combined, ignore_index=True)
    print(f'✓ Test data merged: {test_data.shape}')
    stadiums = sorted(test_data["レース場"].dropna().unique())
    print(f'✓ Unique stadiums: {[int(s) for s in stadiums]}')
    print(f'✓ Actual results available: {test_data["着順"].notna().sum()} rows')
else:
    print('✗ No test data available')

✓ Test data merged: (30441, 36)
✓ Unique stadiums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
✓ Actual results available: 21441 rows


### 3. 予想実行

In [10]:
# Load saved models
with open(model_save_path, 'rb') as f:
    models_dict = pickle.load(f)

print(f'Loaded {len(models_dict)} models')
print(f'Stadiums with models: {sorted(models_dict.keys())}')

# Get expected features from the first model
first_stadium = list(models_dict.keys())[0]
expected_features = models_dict[first_stadium]['features']
print(f'Expected features from saved model: {expected_features}')
print(f'Number of features: {len(expected_features)}')

# Check test data stadiums
test_stadiums = test_data['レース場'].unique()
print(f'\nStadiums in test data: {sorted(test_stadiums)}')
print(f'Stadiums NOT in models: {set(test_stadiums) - set(models_dict.keys())}')

# Check first few rows
print(f'\nFirst 10 rows details:')
for idx in range(min(10, len(test_data))):
    row = test_data.iloc[idx]
    print(f'  Row {idx}: レースコード={row["レースコード"]}, レース場={row["レース場"]}, 日次={row["日次"]}, 日次数={row["日次数"]}')

# Prepare features for prediction using the SAME features as training
# Build features in the same order as training
X_test_pred = pd.DataFrame(index=test_data.index)

# Add numeric features
for col in numeric_cols:
    if col in test_data.columns:
        X_test_pred[col] = pd.to_numeric(test_data[col], errors='coerce')
    else:
        print(f'Warning: {col} not in test_data, filling with 0')
        X_test_pred[col] = 0.0

# Fill NaN with median from training data
for col in numeric_cols:
    median_val = X[col].median()
    X_test_pred[col].fillna(median_val, inplace=True)

# Add encoded categorical
if '級別_encoded' in expected_features:
    if '級別' in test_data.columns:
        X_test_pred['級別_encoded'] = le_grade.transform(test_data['級別'].fillna('未知'))
    else:
        X_test_pred['級別_encoded'] = 0

# Add day number
if '日次数' in expected_features:
    X_test_pred['日次数'] = test_data['日次数'].fillna(1).astype(int)

# Ensure columns are in the EXACT same order as expected
X_test_pred = X_test_pred[expected_features]

print(f'\nX_test_pred shape: {X_test_pred.shape}')
print(f'X_test_pred columns match expected: {list(X_test_pred.columns) == expected_features}')

# Function to generate prediction
def predict_sanrentan(model, scaler, X_row):
    X_scaled = scaler.transform(X_row)
    proba = model.predict_proba(X_scaled)[0]
    classes = model.classes_
    
    prob_dict = {cls: prob for cls, prob in zip(classes, proba)}
    sorted_probs = sorted(prob_dict.items(), key=lambda x: x[1], reverse=True)
    
    top_3 = sorted_probs[:3]
    return top_3

# Make predictions for Sanrentan
sanrentan_predictions = []
errors_log = []
success_count = 0
no_model_count = 0

for idx, row in test_data.iterrows():
    stadium = row['レース場']
    
    # Skip if no model for this stadium
    if stadium not in models_dict:
        sanrentan_predictions.append(None)
        no_model_count += 1
        if no_model_count <= 5:  # Log first 5 missing stadiums
            errors_log.append(f'Row {idx}: No model for stadium {stadium}')
        continue
    
    model_info = models_dict[stadium]
    model = model_info['model']
    scaler = model_info['scaler']
    
    # Prepare features
    X_row = X_test_pred.iloc[idx:idx+1]
    
    try:
        top_3 = predict_sanrentan(model, scaler, X_row)
        top_boats = [int(boat) for boat, _ in top_3]
        sanrentan_predictions.append(tuple(top_boats))
        success_count += 1
    except Exception as e:
        if len(errors_log) < 10:  # Log first 10 errors
            errors_log.append(f'Row {idx} (stadium {stadium}): {type(e).__name__}: {str(e)[:80]}')
        sanrentan_predictions.append(None)

test_data['予想三連単'] = sanrentan_predictions

print(f'\n\n=== 予想結果 ===')
print(f'成功: {success_count}')
print(f'モデルなし: {no_model_count}')
print(f'エラー: {test_data["予想三連単"].isna().sum() - no_model_count}')
if errors_log:
    print(f'\nログ:')
    for err in errors_log:
        print(f'  {err}')
print(f'\nサンプル予想:')
print(test_data[['レースコード', '艇番', 'レース場', '日次数', '予想三連単']].head(15))

Loaded 24 models
Stadiums with models: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24)]
Expected features from saved model: ['年齢', '体重', '全国勝率', '全国2連対率', '当地勝率', '当地2連対率', 'モーター2連対率', 'ボート2連対率', '今節成績_1-1', '今節成績_1-2', '今節成績_2-1', '今節成績_2-2', '今節成績_3-1', '今節成績_3-2', '今節成績_4-1', '今節成績_4-2', '今節成績_5-1', '今節成績_5-2', '今節成績_6-1', '今節成績_6-2', '早見', '級別_encoded', '日次数']
Number of features: 23

Stadiums in test data: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int6

### 4. 的中率の計算（三連単）

In [11]:
# レースコード単位で三連単の着順を取得
race_actual_sanrentan = {}
for race_code in test_data['レースコード'].unique():
    race_mask = test_data['レースコード'] == race_code
    race_subset = test_data[race_mask].sort_values('着順')
    
    # 1着～3着の艇番を取得
    sanrentan = tuple(race_subset[race_subset['着順'].notna()].head(3)['艇番'].astype(int).values)
    if len(sanrentan) == 3:
        race_actual_sanrentan[race_code] = sanrentan

print(f'Race-level Sanrentan results: {len(race_actual_sanrentan)} races')

# レース単位での予想を集計
race_predictions = {}
for race_code in test_data['レースコード'].unique():
    race_mask = test_data['レースコード'] == race_code
    race_subset = test_data[race_mask]
    
    # 最初の行から予想三連単を取得
    if race_subset['予想三連単'].notna().any():
        race_predictions[race_code] = race_subset['予想三連単'].iloc[0]

print(f'Race-level predictions: {len(race_predictions)} races')

# 的中判定
sanrentan_matches = []
for race_code in race_actual_sanrentan.keys():
    if race_code in race_predictions:
        actual = race_actual_sanrentan[race_code]
        predicted = race_predictions[race_code]
        
        is_match = (predicted == actual)
        sanrentan_matches.append({
            'レースコード': race_code,
            '予想三連単': predicted,
            '実績三連単': actual,
            '的中': is_match
        })

if sanrentan_matches:
    sanrentan_df = pd.DataFrame(sanrentan_matches)
    correct = sanrentan_df['的中'].sum()
    total = len(sanrentan_df)
    accuracy = correct / total if total > 0 else 0
    
    print(f'\n三連単的中率: {correct}/{total} = {accuracy:.2%}')
else:
    print('的中判定可能なデータなし')

Race-level Sanrentan results: 3554 races
Race-level predictions: 4988 races

三連単的中率: 108/3554 = 3.04%


### 5. 詳細結果（1着、2着、3着別の的中率）

In [12]:
# Create detailed comparison results
results_list = []

for race_code in sorted(race_actual_sanrentan.keys()):
    if race_code in race_predictions:
        predicted = race_predictions[race_code]
        actual = race_actual_sanrentan[race_code]
        
        # Extract each position
        pred_1st, pred_2nd, pred_3rd = predicted[0], predicted[1], predicted[2]
        actual_1st, actual_2nd, actual_3rd = actual[0], actual[1], actual[2]
        
        # Check matches
        match_1st = '○' if pred_1st == actual_1st else '×'
        match_2nd = '○' if pred_2nd == actual_2nd else '×'
        match_3rd = '○' if pred_3rd == actual_3rd else '×'
        match_all = '○' if (pred_1st == actual_1st and pred_2nd == actual_2nd and pred_3rd == actual_3rd) else '×'
        
        results_list.append({
            'レースコード': race_code,
            '予想1着': pred_1st,
            '予想2着': pred_2nd,
            '予想3着': pred_3rd,
            '実際1着': actual_1st,
            '実際2着': actual_2nd,
            '実際3着': actual_3rd,
            '1着的中': match_1st,
            '2着的中': match_2nd,
            '3着的中': match_3rd,
            '全的中': match_all
        })

if results_list:
    results_df = pd.DataFrame(results_list)
    
    # Calculate statistics
    total_races = len(results_df)
    match_1st_count = (results_df['1着的中'] == '○').sum()
    match_2nd_count = (results_df['2着的中'] == '○').sum()
    match_3rd_count = (results_df['3着的中'] == '○').sum()
    match_all_count = (results_df['全的中'] == '○').sum()
    
    print('=== 的中率レポート（Programs のみモデル）===')
    print(f'レース数: {total_races}')
    print(f'1着的中: {match_1st_count}/{total_races} ({match_1st_count/total_races:.1%})')
    print(f'2着的中: {match_2nd_count}/{total_races} ({match_2nd_count/total_races:.1%})')
    print(f'3着的中: {match_3rd_count}/{total_races} ({match_3rd_count/total_races:.1%})')
    print(f'三連単的中: {match_all_count}/{total_races} ({match_all_count/total_races:.1%})')
else:
    print('No results to display')

=== 的中率レポート（Programs のみモデル）===
レース数: 3554
1着的中: 1133/3554 (31.9%)
2着的中: 694/3554 (19.5%)
3着的中: 653/3554 (18.4%)
三連単的中: 108/3554 (3.0%)


### 6. 推定結果を CSV に出力

In [13]:
# レース単位で三連単の予想を整形して出力
output_records = []

for race_code in sorted(race_predictions.keys()):
    predicted_sanrentan = race_predictions[race_code]
    if predicted_sanrentan is not None:
        output_records.append({
            'レースコード': race_code,
            '予想1着': predicted_sanrentan[0],
            '予想2着': predicted_sanrentan[1],
            '予想3着': predicted_sanrentan[2]
        })

if output_records:
    output_df = pd.DataFrame(output_records)
    
    # 出力ディレクトリを作成
    output_dir = repo_root / 'data' / 'estimate' / '2026' / '01'
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # ファイル名
    output_path = output_dir / 'program_estimate.csv'
    
    # CSV に出力
    output_df.to_csv(output_path, index=False)
    
    print(f'Output saved to: {output_path}')
    print(f'Total predictions: {len(output_df)}')
else:
    print('No predictions to output')

Output saved to: /Users/mahiguch/dev/boatrace/data/data/estimate/2026/01/program_estimate.csv
Total predictions: 4988
