# Programs のみを用いた着順予想モデル

対象：2016年～2025年のデータを使用して、ボートレース場ごとに着順予想モデルを構築。
- データソース: programs のみ（風向、天候は使用しない）
- 学習対象: 日次が3日目以降のレースのみ
- モデル: GradientBoostingClassifier（レース場ごと）
- 予想・的中率検証: 2026年1月1日～30日

## セットアップ

In [1]:
from pathlib import Path
import calendar
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
import warnings
warnings.filterwarnings('ignore')

print('Setup complete')

Setup complete


## データ変形関数の定義

In [2]:
def reshape_programs(df):
    """
    Programs を艇単位に変形
    各艇について、レース情報と艇固有情報（選手、モーター、ボート）を1行に
    """
    frames = []
    race_cols = ['レースコード', '日次', 'レース日', 'レース場', 'レース回']
    
    for frame in range(1, 7):
        prefix = f'{frame}枠_'
        cols = [c for c in df.columns if c.startswith(prefix)]
        if cols:
            tmp = df[race_cols + cols].copy()
            tmp.columns = race_cols + [c[len(prefix):] for c in cols]
            tmp['枠'] = frame
            frames.append(tmp)
    
    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()

def reshape_results(df):
    """
    Results を艇単位に変形
    着順情報を艇番とマッチングして1行に集約
    新形式: 〇着_艇番 というカラム形式に対応
    """
    result_list = []
    
    for idx, row in df.iterrows():
        race_code = row['レースコード']
        
        # Try to find boat numbers for each position
        for place in range(1, 7):
            boat_col = f'{place}着_艇番'
            
            # Check if column exists
            if boat_col not in df.columns:
                continue
                
            boat_num = row[boat_col]
            
            # Skip if boat number is NaN or invalid
            if pd.isna(boat_num):
                continue
            
            # Handle both int and float types
            try:
                boat_num = int(boat_num)
                if boat_num < 1 or boat_num > 6:
                    continue
                    
                result_list.append({
                    'レースコード': race_code,
                    '艇番': boat_num,
                    '着順': place
                })
            except (ValueError, TypeError):
                continue
    
    return pd.DataFrame(result_list) if result_list else pd.DataFrame()

def extract_day_number(day_str):
    """
    日次文字列から数値を抽出
    '第1日' -> 1, '第2日' -> 2, etc.
    """
    if pd.isna(day_str):
        return np.nan
    day_str = str(day_str)
    if '第' in day_str and '日' in day_str:
        try:
            return int(day_str.replace('第', '').replace('日', ''))
        except:
            return np.nan
    return np.nan

print('Reshape functions ready')
# Stadium name to number mapping

# Stadium code from レースコード to standard stadium number (1-24)
RACE_CODE_TO_STADIUM = {
    0: 17,   # 唐津
    6: 6,    # 浜名湖
    7: 7,    # 蒲郡
    8: 8,    # 常滑
    9: 9,    # 津
    10: 10,  # 三国
    16: 13,  # 丸亀
    19: 21,  # 徳山
    20: 20,  # 下関
    22: 19,  # 芦屋
    24: 18,  # 大村
}

def extract_stadium_from_race_code(race_code):
    """
    レースコードから競艇場番号を抽出
    レースコード形式: YYYYMMDDCCRRwhere CC is stadium code (positions 8-10)
    """
    if pd.isna(race_code):
        return np.nan
    race_code_str = str(race_code)
    if len(race_code_str) >= 10:
        try:
            stadium_code = int(race_code_str[8:10])
            return RACE_CODE_TO_STADIUM.get(stadium_code, np.nan)
        except:
            return np.nan
    return np.nan

print('Stadium extraction from race code ready')

# Stadium name to standard number mapping (1-24)
# Note: びわこ can be written as 琵琶湖 in some data versions
STADIUM_NAME_TO_NUMBER = {
    'ボートレース桐生': 1,
    'ボートレース戸田': 2,
    'ボートレース江戸川': 3,
    'ボートレース平和島': 4,
    'ボートレース多摩川': 5,
    'ボートレース浜名湖': 6,
    'ボートレース蒲郡': 7,
    'ボートレース常滑': 8,
    'ボートレース津': 9,
    'ボートレース三国': 10,
    'ボートレースびわこ': 11,
    'ボートレース琵琶湖': 11,  # Alternative name for びわこ
    'ボートレース住之江': 12,
    'ボートレース尼崎': 13,
    'ボートレース鳴門': 14,
    'ボートレース丸亀': 15,
    'ボートレース児島': 16,
    'ボートレース宮島': 17,
    'ボートレース徳山': 18,
    'ボートレース下関': 19,
    'ボートレース若松': 20,
    'ボートレース芦屋': 21,
    'ボートレース福岡': 22,
    'ボートレース唐津': 23,
    'ボートレース大村': 24,
}

def map_stadium_name_to_number(stadium_name):
    """
    競艇場の名前から標準番号（1-24）に変換
    """
    if pd.isna(stadium_name):
        return np.nan
    stadium_name = str(stadium_name).strip()
    return STADIUM_NAME_TO_NUMBER.get(stadium_name, np.nan)

print('Stadium name to number mapping ready')

Reshape functions ready
Stadium extraction from race code ready
Stadium name to number mapping ready


## 2016～2025年のデータで学習

### 1. データ読み込み（2016～2025年）

In [3]:
cwd = Path.cwd()
repo_root = cwd if (cwd / 'data').exists() else cwd.parent.parent

print(f'Current working directory: {cwd}')
print(f'Repository root: {repo_root}')

# Load data for 2016-2025
all_data = {}
years = [str(y) for y in range(2016, 2026)]

for year in years:
    for month in range(1, 13):
        # Get the number of days in this month
        _, max_day = calendar.monthrange(int(year), month)
        for day in range(1, max_day + 1):
            month_str = f'{month:02d}'
            day_str = f'{day:02d}'
            prog_path = repo_root / 'data' / 'programs' / year / month_str / f'{day_str}.csv'
            res_path = repo_root / 'data' / 'results' / year / month_str / f'{day_str}.csv'
            
            if prog_path.exists() and res_path.exists():
                date_key = f'{year}-{month_str}-{day_str}'
                try:
                    all_data[date_key] = {
                        'programs': pd.read_csv(prog_path),
                        'results': pd.read_csv(res_path)
                    }
                except Exception as e:
                    print(f'Error loading {date_key}: {e}')

print(f'Loaded {len(all_data)} days (2016-2025)')

Current working directory: /Users/mahiguch/dev/boatrace/data/docs/notebooks
Repository root: /Users/mahiguch/dev/boatrace/data
Loaded 3649 days (2016-2025)


### 2. データ統合（stadium 1-24のみ）

In [4]:
combined_data = []
errors = []

print(f'Processing {len(all_data)} days...')
processed_count = 0
skipped_count = 0

for date_str, data in all_data.items():
    try:
        prog = reshape_programs(data['programs'])
        res = reshape_results(data['results'])
        
        if prog.empty or res.empty:
            continue
        
        # Extract day number
        prog['日次数'] = prog['日次'].apply(extract_day_number)
        
        # Map stadium name to number
        prog['レース場'] = prog['レース場'].apply(map_stadium_name_to_number)
        
        # Remove rows with unknown stadium
        prog = prog[prog['レース場'].notna()].reset_index(drop=True)
        
        if prog.empty:
            skipped_count += 1
            continue
        
        # Merge with results using レースコード
        merged = prog.merge(
            res[['レースコード', '艇番', '着順']],
            on=['レースコード', '艇番'],
            how='left'
        )
        
        # Try to load and merge actual previews
        year, month, day = date_str.split('-')
        prev_path = repo_root / 'data' / 'previews' / year / month / f'{day}.csv'
        
        if prev_path.exists():
            try:
                prev_df = pd.read_csv(prev_path)
                
                # Expand previews to boat level
                prev_expanded = []
                for _, race_row in prev_df.iterrows():
                    race_code = race_row['レースコード']
                    
                    for boat in range(1, 7):
                        boat_row = {'レースコード': race_code}
                        
                        # Extract exhibition time
                        time_col = f'艇{boat}_展示タイム'
                        if time_col in prev_df.columns:
                            boat_row['展示タイム'] = race_row[time_col]
                        
                        # Extract course
                        course_col = f'艇{boat}_コース'
                        if course_col in prev_df.columns:
                            boat_row['コース'] = race_row[course_col]
                        
                        # Extract start timing
                        start_col = f'艇{boat}_スタート展示'
                        if start_col in prev_df.columns:
                            boat_row['スタート展示'] = race_row[start_col]
                        
                        # Extract tilt adjustment
                        tilt_col = f'艇{boat}_チルト調整'
                        if tilt_col in prev_df.columns:
                            boat_row['チルト調整'] = race_row[tilt_col]
                        
                        boat_row['艇番'] = boat
                        prev_expanded.append(boat_row)
                
                if prev_expanded:
                    prev_boat_df = pd.DataFrame(prev_expanded)
                    merged = merged.merge(
                        prev_boat_df,
                        on=['レースコード', '艇番'],
                        how='left'
                    )
            except Exception as e:
                pass  # Skip if previews cannot be merged
        
        combined_data.append(merged)
        processed_count += 1
        
    except Exception as e:
        errors.append((date_str, type(e).__name__))

print(f'✓ Processed {processed_count} days successfully')
if skipped_count > 0:
    print(f'⚠ Skipped {skipped_count} days (no mapped stadiums)')
if errors:
    print(f'✗ Errors: {len(errors)}')

if combined_data:
    final_data = pd.concat(combined_data, ignore_index=True)
    print(f'\n✓ Final: {final_data.shape}')
    print(f'✓ Unique dates: {final_data["レース日"].nunique()}')
    stadiums = sorted(final_data["レース場"].dropna().unique())
    print(f'✓ Stadiums: {[int(s) for s in stadiums]}')
    print(f'✓ Stadium count: {len(stadiums)}')
    print(f'✓ Target missing: {final_data["着順"].isna().sum()} rows')
    
    # Check if previews columns were added
    if '展示タイム' in final_data.columns:
        print(f'✓ Exhibition times: {final_data["展示タイム"].notna().sum()} rows')
    if 'コース' in final_data.columns:
        print(f'✓ Courses: {final_data["コース"].notna().sum()} rows')
    if 'スタート展示' in final_data.columns:
        print(f'✓ Start timing: {final_data["スタート展示"].notna().sum()} rows')
    if 'チルト調整' in final_data.columns:
        print(f'✓ Tilt adjustment: {final_data["チルト調整"].notna().sum()} rows')
else:
    print('\n✗ ERROR: No data merged!')

Processing 3649 days...
✓ Processed 3649 days successfully

✓ Final: (3309152, 40)
✓ Unique dates: 3649
✓ Stadiums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
✓ Stadium count: 24
✓ Target missing: 807114 rows
✓ Exhibition times: 2492690 rows
✓ Courses: 2490951 rows
✓ Start timing: 2490530 rows
✓ Tilt adjustment: 2494231 rows


### 3. 特徴量準備

In [5]:
# Check if final_data exists
if 'final_data' not in locals():
    print('\n' + '='*70)
    print('ERROR: final_data not defined')
    print('='*70)
    print('\nPossible causes:')
    print('1. Cell-7 failed to load data (all_data is empty)')
    print('2. Cell-8 failed to merge data (combined_data is empty)')
    print('3. Stadium filter excluded all rows')
    print('\nAction: Run cell-7 and cell-8 again and check their output above.')
    print('='*70)
    raise NameError('final_data not defined - check cells 7 and 8')

print('✓ final_data loaded successfully')

if 'final_data' not in locals():
    print('ERROR: final_data not defined. Check cell-7 and cell-8.')
    print('This usually means data loading or merging failed.')
    raise NameError('final_data not defined')

exclude_cols = {
    'レースコード', '日次', 'レース日', 'レース場', 'レース回',
    '艇番', '登録番号', '選手名', '支部',
    '枠', '着順', '日次数',
    'モーター番号', 'ボート番号'  # IDs, not features
}

categorical_cols = {'級別'}

numeric_cols = []
for col in final_data.columns:
    if col not in exclude_cols and col not in categorical_cols:
        try:
            pd.to_numeric(final_data[col], errors='coerce')
            numeric_cols.append(col)
        except:
            pass

X = final_data[numeric_cols].copy()

for col in X.columns:
    X[col] = pd.to_numeric(X[col], errors='coerce')

for col in X.columns:
    median_val = X[col].median()
    if pd.isna(median_val):
        X[col].fillna(0, inplace=True)
    else:
        X[col].fillna(median_val, inplace=True)

if '級別' in final_data.columns:
    le_grade = LabelEncoder()
    X['級別_encoded'] = le_grade.fit_transform(final_data['級別'].fillna('未知'))

X['日次数'] = final_data['日次数'].fillna(1).astype(int)

# Add interaction features based on previews
if '展示タイム' in X.columns:
    # Exhibition time ranking within each race
    X['展示タイム順位'] = final_data.groupby('レースコード')['展示タイム'].rank(method='min')
    
    # Exhibition time difference (from fastest in race)
    fastest_time = final_data.groupby('レースコード')['展示タイム'].transform('min')
    X['展示タイム差'] = final_data['展示タイム'] - fastest_time
    
    X['展示タイム順位'].fillna(0, inplace=True)
    X['展示タイム差'].fillna(0, inplace=True)

if 'コース' in X.columns:
    # Course interaction with player strength
    X['コース×全国勝率'] = final_data['コース'] * final_data['全国勝率'].fillna(0)
    X['コース×全国勝率'].fillna(0, inplace=True)

if 'スタート展示' in X.columns:
    # Start timing ranking within each race
    X['スタート展示順位'] = final_data.groupby('レースコード')['スタート展示'].rank(method='min')
    
    # Start timing difference (from fastest in race)
    fastest_start = final_data.groupby('レースコード')['スタート展示'].transform('min')
    X['スタート展示差'] = final_data['スタート展示'] - fastest_start
    
    X['スタート展示順位'].fillna(0, inplace=True)
    X['スタート展示差'].fillna(0, inplace=True)

if 'チルト調整' in X.columns:
    # Tilt adjustment as binary feature (0 = no adjustment, 1 = has adjustment)
    X['チルト調整_有'] = (final_data['チルト調整'] > 0).astype(int)
    
    # Interaction: frame × tilt adjustment
    if '枠' in final_data.columns:
        X['枠×チルト調整'] = final_data['枠'] * final_data['チルト調整'].fillna(0)
        X['枠×チルト調整'].fillna(0, inplace=True)

total_nan = X.isna().sum().sum()
print(f'Total NaN count after filling: {total_nan}')

y = final_data['着順']
stadiums = sorted(final_data['レース場'].unique())

print(f'\nFeatures: {len(X.columns)}')
print(f'Samples: {len(X)}')
print(f'Stadiums: {len(stadiums)}')
print(f'Target missing: {y.isna().sum()}')
print(f'\nFeatures with previews:')
if '展示タイム' in X.columns:
    print(f'  ✓ 展示タイム')
if 'コース' in X.columns:
    print(f'  ✓ コース')
if 'スタート展示' in X.columns:
    print(f'  ✓ スタート展示')
if 'チルト調整' in X.columns:
    print(f'  ✓ チルト調整')
print(f'\nInteraction features:')
if '展示タイム順位' in X.columns:
    print(f'  ✓ 展示タイム順位（相互作用）')
if '展示タイム差' in X.columns:
    print(f'  ✓ 展示タイム差（相互作用）')
if 'コース×全国勝率' in X.columns:
    print(f'  ✓ コース×全国勝率（相互作用）')
if 'スタート展示順位' in X.columns:
    print(f'  ✓ スタート展示順位（相互作用）')
if 'スタート展示差' in X.columns:
    print(f'  ✓ スタート展示差（相互作用）')
if 'チルト調整_有' in X.columns:
    print(f'  ✓ チルト調整_有（相互作用）')
if '枠×チルト調整' in X.columns:
    print(f'  ✓ 枠×チルト調整（相互作用）')

✓ final_data loaded successfully
Total NaN count after filling: 0

Features: 34
Samples: 3309152
Stadiums: 24
Target missing: 807114

Features with previews:
  ✓ 展示タイム
  ✓ コース
  ✓ スタート展示
  ✓ チルト調整

Interaction features:
  ✓ 展示タイム順位（相互作用）
  ✓ 展示タイム差（相互作用）
  ✓ コース×全国勝率（相互作用）
  ✓ スタート展示順位（相互作用）
  ✓ スタート展示差（相互作用）
  ✓ チルト調整_有（相互作用）
  ✓ 枠×チルト調整（相互作用）


### 4. モデル学習

In [6]:
results_summary = []

for stadium in stadiums:
    mask = final_data['レース場'] == stadium
    X_std = X[mask].reset_index(drop=True)
    y_std = y[mask].reset_index(drop=True)
    
    # Remove missing targets
    valid = y_std.notna()
    X_std = X_std[valid].reset_index(drop=True)
    y_std = y_std[valid].reset_index(drop=True)
    
    if len(X_std) < 10:
        print(f'Stadium {int(stadium)}: insufficient data ({len(X_std)} samples)')
        continue
    
    X_train, X_test, y_train, y_test = train_test_split(
        X_std, y_std, test_size=0.3, random_state=42
    )
    
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)
    
    # GBC
    try:
        gbc = GradientBoostingClassifier(
            n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42
        )
        gbc.fit(X_train_s, y_train)
        acc = accuracy_score(y_test, gbc.predict(X_test_s))
        results_summary.append({'stadium': int(stadium), 'accuracy': acc, 'samples': len(X_std)})
    except Exception as e:
        print(f'Stadium {int(stadium)} error: {type(e).__name__}: {str(e)[:50]}')

if results_summary:
    results_df = pd.DataFrame(results_summary)
    print('\n=== モデル学習結果 ===')
    print(results_df.to_string(index=False))
    print(f'\n成功: {len(results_df)} / {len(stadiums)} スタジアム')
else:
    print('学習失敗')


=== モデル学習結果 ===
 stadium  accuracy  samples
       1  0.299053   135549
       2  0.282910   136862
       3  0.286687   124337
       4  0.286279   127345
       5  0.306494   120633
       6  0.306672   141780
       7  0.319708   137082
       8  0.325938   141529
       9  0.321112   123547
      10  0.321578   133840
      11  0.318180    66869
      12  0.306332   135299
      13  0.279048    64288
      14  0.291480    69290
      15  0.293133    74903
      16  0.303874    70379
      17  0.324187    70821
      18  0.313815    67341
      19  0.298661    61239
      20  0.314547    70734
      21  0.312355    67316
      22  0.286669    56383
      23  0.303955   128626
      24  0.328076   176046

成功: 24 / 24 スタジアム


### 5. モデル保存

In [7]:
# Store models and scaler info
models_dict = {}

for stadium in stadiums:
    mask = final_data['レース場'] == stadium
    X_std = X[mask].reset_index(drop=True)
    y_std = y[mask].reset_index(drop=True)
    
    # Remove missing targets
    valid = y_std.notna()
    X_std = X_std[valid].reset_index(drop=True)
    y_std = y_std[valid].reset_index(drop=True)
    
    if len(X_std) < 10:
        continue
    
    X_train, X_test, y_train, y_test = train_test_split(
        X_std, y_std, test_size=0.3, random_state=42
    )
    
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    
    # Train GBC model
    gbc = GradientBoostingClassifier(
        n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42
    )
    gbc.fit(X_train_s, y_train)
    
    models_dict[stadium] = {
        'model': gbc,
        'scaler': scaler,
        'features': list(X.columns)
    }

# Save models
model_save_path = repo_root / 'models' / 'program_models.pkl'
model_save_path.parent.mkdir(parents=True, exist_ok=True)

with open(model_save_path, 'wb') as f:
    pickle.dump(models_dict, f)

print(f'✓ 保存: {len(models_dict)} モデルを {model_save_path} に保存')

✓ 保存: 24 モデルを /Users/mahiguch/dev/boatrace/data/models/program_models.pkl に保存


## 2026年1月1日～30日のデータで予想

### 1. テストデータ読み込み

In [8]:
# Load data for 2026-01
test_data_list = []

year_test = '2026'
month_test = '01'
month_num = int(month_test)
year_num = int(year_test)

_, max_day = calendar.monthrange(year_num, month_num)

for day in range(1, max_day + 1):
    day_str = f'{day:02d}'
    prog_path = repo_root / 'data' / 'programs' / year_test / month_test / f'{day_str}.csv'
    res_path = repo_root / 'data' / 'results' / year_test / month_test / f'{day_str}.csv'
    
    if prog_path.exists() and res_path.exists():
        try:
            prog_test = pd.read_csv(prog_path)
            res_test = pd.read_csv(res_path)
            test_data_list.append((day_str, prog_test, res_test))
        except Exception as e:
            print(f'Error loading {year_test}-{month_test}-{day_str}: {e}')

print(f'✓ Loaded {len(test_data_list)} days for 2026-01')

✓ Loaded 31 days for 2026-01


### 2. テストデータの変形とマージ

In [9]:
# Reshape and merge test data with predicted previews
test_combined = []

for day, prog_test, res_test in test_data_list:
    prog_reshaped = reshape_programs(prog_test)
    res_reshaped = reshape_results(res_test)
    
    if prog_reshaped.empty or res_reshaped.empty:
        continue
    
    # Extract day number
    prog_reshaped['日次数'] = prog_reshaped['日次'].apply(extract_day_number)
    
    # Map stadium name to number
    prog_reshaped['レース場'] = prog_reshaped['レース場'].apply(map_stadium_name_to_number)
    
    # Remove rows with unknown stadium
    prog_reshaped = prog_reshaped[prog_reshaped['レース場'].notna()].reset_index(drop=True)
    
    if prog_reshaped.empty:
        continue
    
    # Merge with results
    test_data = prog_reshaped.merge(
        res_reshaped[['レースコード', '艇番', '着順']],
        on=['レースコード', '艇番'],
        how='left'
    )
    
    # Load and merge predicted previews
    pred_prev_path = repo_root / 'data' / 'prediction-preview' / '2026' / '01' / f'{day}.csv'
    
    if pred_prev_path.exists():
        try:
            pred_prev_df = pd.read_csv(pred_prev_path)
            
            # Expand predictions to boat level
            pred_expanded = []
            for _, race_row in pred_prev_df.iterrows():
                race_code = race_row['レースコード']
                
                for boat in range(1, 7):
                    boat_row = {'レースコード': race_code}
                    
                    # Extract predicted exhibition time
                    time_col = f'艇{boat}_展示タイム'
                    if time_col in pred_prev_df.columns:
                        boat_row['展示タイム'] = race_row[time_col]
                    
                    # Extract predicted course
                    course_col = f'艇{boat}_コース'
                    if course_col in pred_prev_df.columns:
                        boat_row['コース'] = race_row[course_col]
                    
                    # Extract predicted start timing
                    start_col = f'艇{boat}_スタート展示'
                    if start_col in pred_prev_df.columns:
                        boat_row['スタート展示'] = race_row[start_col]
                    
                    # Extract predicted tilt adjustment
                    tilt_col = f'艇{boat}_チルト調整'
                    if tilt_col in pred_prev_df.columns:
                        boat_row['チルト調整'] = race_row[tilt_col]
                    
                    boat_row['艇番'] = boat
                    pred_expanded.append(boat_row)
            
            if pred_expanded:
                pred_boat_df = pd.DataFrame(pred_expanded)
                test_data = test_data.merge(
                    pred_boat_df,
                    on=['レースコード', '艇番'],
                    how='left'
                )
        except Exception as e:
            print(f'Warning: Could not merge predicted previews for day {day}: {e}')
    
    test_combined.append(test_data)

if test_combined:
    test_data = pd.concat(test_combined, ignore_index=True)
    print(f'✓ Test data merged: {test_data.shape}')
    stadiums = sorted(test_data["レース場"].dropna().unique())
    print(f'✓ Unique stadiums: {[int(s) for s in stadiums]}')
    print(f'✓ Actual results available: {test_data["着順"].notna().sum()} rows')
    
    # Check predicted previews columns
    if '展示タイム' in test_data.columns:
        print(f'✓ Exhibition times: {test_data["展示タイム"].notna().sum()} rows')
    if 'コース' in test_data.columns:
        print(f'✓ Courses: {test_data["コース"].notna().sum()} rows')
    if 'スタート展示' in test_data.columns:
        print(f'✓ Start timing: {test_data["スタート展示"].notna().sum()} rows')
    if 'チルト調整' in test_data.columns:
        print(f'✓ Tilt adjustment: {test_data["チルト調整"].notna().sum()} rows')
else:
    print('✗ No test data available')

✓ Test data merged: (31353, 40)
✓ Unique stadiums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
✓ Actual results available: 22128 rows
✓ Exhibition times: 31353 rows
✓ Courses: 31353 rows
✓ Start timing: 31353 rows
✓ Tilt adjustment: 31353 rows


### 3. テストデータの相互作用特徴量追加

In [10]:
# Add the same interaction features to test data as training
if '展示タイム' in test_data.columns:
    # Exhibition time ranking within each race
    test_data['展示タイム順位'] = test_data.groupby('レースコード')['展示タイム'].rank(method='min')
    
    # Exhibition time difference (from fastest in race)
    fastest_time = test_data.groupby('レースコード')['展示タイム'].transform('min')
    test_data['展示タイム差'] = test_data['展示タイム'] - fastest_time
    
    test_data['展示タイム順位'].fillna(0, inplace=True)
    test_data['展示タイム差'].fillna(0, inplace=True)

if 'コース' in test_data.columns:
    # Course interaction with player strength
    test_data['コース×全国勝率'] = test_data['コース'] * test_data['全国勝率'].fillna(0)
    test_data['コース×全国勝率'].fillna(0, inplace=True)

if 'スタート展示' in test_data.columns:
    # Start timing ranking within each race
    test_data['スタート展示順位'] = test_data.groupby('レースコード')['スタート展示'].rank(method='min')
    
    # Start timing difference (from fastest in race)
    fastest_start = test_data.groupby('レースコード')['スタート展示'].transform('min')
    test_data['スタート展示差'] = test_data['スタート展示'] - fastest_start
    
    test_data['スタート展示順位'].fillna(0, inplace=True)
    test_data['スタート展示差'].fillna(0, inplace=True)

if 'チルト調整' in test_data.columns:
    # Tilt adjustment as binary feature
    test_data['チルト調整_有'] = (test_data['チルト調整'] > 0).astype(int)
    
    # Interaction: frame × tilt adjustment
    if '枠' in test_data.columns:
        test_data['枠×チルト調整'] = test_data['枠'] * test_data['チルト調整'].fillna(0)
        test_data['枠×チルト調整'].fillna(0, inplace=True)

print('✓ Interaction features added to test data')
print(f'  Test data shape: {test_data.shape}')
if '展示タイム順位' in test_data.columns:
    print(f'  Exhibition time rank: available')
if '展示タイム差' in test_data.columns:
    print(f'  Exhibition time diff: available')
if 'コース×全国勝率' in test_data.columns:
    print(f'  Course × Win rate: available')
if 'スタート展示順位' in test_data.columns:
    print(f'  Start timing rank: available')
if 'スタート展示差' in test_data.columns:
    print(f'  Start timing diff: available')
if 'チルト調整_有' in test_data.columns:
    print(f'  Tilt adjustment flag: available')
if '枠×チルト調整' in test_data.columns:
    print(f'  Frame × Tilt: available')

✓ Interaction features added to test data
  Test data shape: (31353, 47)
  Exhibition time rank: available
  Exhibition time diff: available
  Course × Win rate: available
  Start timing rank: available
  Start timing diff: available
  Tilt adjustment flag: available
  Frame × Tilt: available


In [21]:
# Prepare test feature matrix X_test
exclude_cols_test = {
    'レースコード', '日次', 'レース日', 'レース場', 'レース回',
    '艇番', '登録番号', '選手名', '支部',
    '枠', '着順', '日次数',
    'モーター番号', 'ボート番号', '予想三連単'
}

categorical_cols_test = {'級別'}

numeric_cols_test = []
for col in test_data.columns:
    if col not in exclude_cols_test and col not in categorical_cols_test:
        try:
            pd.to_numeric(test_data[col], errors='coerce')
            numeric_cols_test.append(col)
        except:
            pass

X_test = test_data[numeric_cols_test].copy()

for col in X_test.columns:
    X_test[col] = pd.to_numeric(X_test[col], errors='coerce')

for col in X_test.columns:
    median_val = X_test[col].median()
    if pd.isna(median_val):
        X_test[col].fillna(0, inplace=True)
    else:
        X_test[col].fillna(median_val, inplace=True)

if '級別' in test_data.columns:
    le_grade_test = LabelEncoder()
    X_test['級別_encoded'] = le_grade_test.fit_transform(test_data['級別'].fillna('未知'))

X_test['日次数'] = test_data['日次数'].fillna(1).astype(int)

# CRITICAL: Reorder X_test columns to match training X columns exactly
X_test = X_test[X.columns]

print('✓ Test feature matrix created')
print(f'  Shape: {X_test.shape}')
print(f'  Features: {len(X_test.columns)}')
print(f'  Column order matches training: {list(X_test.columns) == list(X.columns)}')

✓ Test feature matrix created
  Shape: (31353, 34)
  Features: 34
  Column order matches training: True


### 4. 予測生成

In [22]:
# Debug: Check test_data and X_test before prediction
print('=== Debug Info ===')
print(f'\ntest_data shape: {test_data.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'test_data columns: {test_data.shape[1]}')
print(f'X_test columns: {X_test.shape[1]}')
print(f'\nX_test features: {list(X_test.columns)}')
print(f'\nTraining X features: {list(X.columns)}')

# Check if features match
training_features = set(X.columns)
test_features = set(X_test.columns)

missing_in_test = training_features - test_features
extra_in_test = test_features - training_features

if missing_in_test:
    print(f'\n⚠ Missing in test: {missing_in_test}')
if extra_in_test:
    print(f'\n⚠ Extra in test: {extra_in_test}')
if not missing_in_test and not extra_in_test:
    print(f'\n✓ Features match perfectly')

# Check for NaN values
print(f'\ntest_data NaN count:')
print(f'  レース場: {test_data["レース場"].isna().sum()}')
print(f'  着順: {test_data["着順"].isna().sum()}')

print(f'\nX_test NaN count:')
print(f'  Total: {X_test.isna().sum().sum()}')
print(f'  Per column (top 10):')
nan_counts = X_test.isna().sum().sort_values(ascending=False).head(10)
for col, count in nan_counts.items():
    print(f'    {col}: {count}')

# Sample races
print(f'\nSample race codes in test_data:')
sample_races = test_data['レースコード'].unique()[:5]
for race in sample_races:
    race_rows = test_data[test_data['レースコード'] == race]
    stadium = race_rows['レース場'].iloc[0]
    print(f'  {race}: stadium={stadium}, boats={len(race_rows)}')

=== Debug Info ===

test_data shape: (31353, 48)
X_test shape: (31353, 34)
test_data columns: 48
X_test columns: 34

X_test features: ['年齢', '体重', '全国勝率', '全国2連対率', '当地勝率', '当地2連対率', 'モーター2連対率', 'ボート2連対率', '今節成績_1-1', '今節成績_1-2', '今節成績_2-1', '今節成績_2-2', '今節成績_3-1', '今節成績_3-2', '今節成績_4-1', '今節成績_4-2', '今節成績_5-1', '今節成績_5-2', '今節成績_6-1', '今節成績_6-2', '早見', '展示タイム', 'コース', 'スタート展示', 'チルト調整', '級別_encoded', '日次数', '展示タイム順位', '展示タイム差', 'コース×全国勝率', 'スタート展示順位', 'スタート展示差', 'チルト調整_有', '枠×チルト調整']

Training X features: ['年齢', '体重', '全国勝率', '全国2連対率', '当地勝率', '当地2連対率', 'モーター2連対率', 'ボート2連対率', '今節成績_1-1', '今節成績_1-2', '今節成績_2-1', '今節成績_2-2', '今節成績_3-1', '今節成績_3-2', '今節成績_4-1', '今節成績_4-2', '今節成績_5-1', '今節成績_5-2', '今節成績_6-1', '今節成績_6-2', '早見', '展示タイム', 'コース', 'スタート展示', 'チルト調整', '級別_encoded', '日次数', '展示タイム順位', '展示タイム差', 'コース×全国勝率', 'スタート展示順位', 'スタート展示差', 'チルト調整_有', '枠×チルト調整']

✓ Features match perfectly

test_data NaN count:
  レース場: 0
  着順: 9225

X_test NaN count:
  Total: 0
  Per column (top 10):
    年齢: 

In [23]:
# Load trained models
with open(model_save_path, 'rb') as f:
    models_dict = pickle.load(f)

print(f'✓ Loaded {len(models_dict)} program models')

# Generate predictions for each race using correct logic
predictions_dict = {}  # race_code -> (pred_boat1, pred_boat2, pred_boat3)
successful_races = 0

for race_code in test_data['レースコード'].unique():
    race_mask = test_data['レースコード'] == race_code
    race_data = test_data[race_mask]
    race_X = X_test[race_mask]
    
    if race_data.empty or race_X.empty:
        continue
    
    stadium = int(race_data['レース場'].iloc[0])
    
    # Check if stadium is in models
    if stadium not in models_dict:
        continue
    
    model_info = models_dict[stadium]
    model = model_info['model']
    scaler = model_info['scaler']
    
    try:
        # Convert to numpy array to avoid sklearn feature name issues
        race_X_np = race_X.values  # Convert DataFrame to numpy
        
        # Scale features
        X_race_scaled = scaler.transform(race_X_np)
        
        # Get predicted finishing position for each boat
        boat_predictions = {}
        for boat_idx, (_, boat_row) in enumerate(race_data.iterrows()):
            boat_num = int(boat_row['艇番'])
            X_boat = X_race_scaled[boat_idx:boat_idx+1]
            
            # Predict finishing position (1-6)
            pred_pos = model.predict(X_boat)[0]
            boat_predictions[boat_num] = int(pred_pos)
        
        # Sort boats by predicted finishing position
        sorted_boats = sorted(boat_predictions.items(), key=lambda x: x[1])
        
        # Take boats with predicted positions 1, 2, 3
        if len(sorted_boats) >= 3:
            sanrentan = (sorted_boats[0][0], sorted_boats[1][0], sorted_boats[2][0])
            predictions_dict[race_code] = sanrentan
            successful_races += 1
    except Exception as e:
        # Skip this race if prediction fails
        pass

# Calculate stats
total_races = len(test_data['レースコード'].unique())
valid_predictions = successful_races / total_races if total_races > 0 else 0

print(f'\nPrediction Results:')
print(f'  Total races: {total_races}')
print(f'  Successful: {successful_races}')
print(f'  Success rate: {valid_predictions:.1%}')

# Add sanrentan predictions to test_data
test_data['予想三連単'] = test_data['レースコード'].map(predictions_dict)

print(f'\n✓ Generated predictions for {valid_predictions:.0%} of races')

✓ Loaded 24 program models

Prediction Results:
  Total races: 5128
  Successful: 5128
  Success rate: 100.0%

✓ Generated predictions for 100% of races


### 4. 的中率の計算（三連単）

In [24]:
# レースコード単位で三連単の着順を取得
race_actual_sanrentan = {}
for race_code in test_data['レースコード'].unique():
    race_mask = test_data['レースコード'] == race_code
    race_subset = test_data[race_mask].sort_values('着順')
    
    # 1着～3着の艇番を取得
    sanrentan = tuple(race_subset[race_subset['着順'].notna()].head(3)['艇番'].astype(int).values)
    if len(sanrentan) == 3:
        race_actual_sanrentan[race_code] = sanrentan

print(f'Race-level Sanrentan results: {len(race_actual_sanrentan)} races')

# レース単位での予想を集計
race_predictions = {}
for race_code in test_data['レースコード'].unique():
    race_mask = test_data['レースコード'] == race_code
    race_subset = test_data[race_mask]
    
    # 最初の行から予想三連単を取得
    if race_subset['予想三連単'].notna().any():
        race_predictions[race_code] = race_subset['予想三連単'].iloc[0]

print(f'Race-level predictions: {len(race_predictions)} races')

# 的中判定
sanrentan_matches = []
for race_code in race_actual_sanrentan.keys():
    if race_code in race_predictions:
        actual = race_actual_sanrentan[race_code]
        predicted = race_predictions[race_code]
        
        is_match = (predicted == actual)
        sanrentan_matches.append({
            'レースコード': race_code,
            '予想三連単': predicted,
            '実績三連単': actual,
            '的中': is_match
        })

if sanrentan_matches:
    sanrentan_df = pd.DataFrame(sanrentan_matches)
    correct = sanrentan_df['的中'].sum()
    total = len(sanrentan_df)
    accuracy = correct / total if total > 0 else 0
    
    print(f'\n三連単的中率: {correct}/{total} = {accuracy:.2%}')
else:
    print('的中判定可能なデータなし')

Race-level Sanrentan results: 3660 races
Race-level predictions: 5128 races

三連単的中率: 245/3660 = 6.69%


### 5. 詳細結果（1着、2着、3着別の的中率）

In [25]:
# Create detailed comparison results
results_list = []

for race_code in sorted(race_actual_sanrentan.keys()):
    if race_code in race_predictions:
        predicted = race_predictions[race_code]
        actual = race_actual_sanrentan[race_code]
        
        # Extract each position
        pred_1st, pred_2nd, pred_3rd = predicted[0], predicted[1], predicted[2]
        actual_1st, actual_2nd, actual_3rd = actual[0], actual[1], actual[2]
        
        # Check matches
        match_1st = '○' if pred_1st == actual_1st else '×'
        match_2nd = '○' if pred_2nd == actual_2nd else '×'
        match_3rd = '○' if pred_3rd == actual_3rd else '×'
        match_all = '○' if (pred_1st == actual_1st and pred_2nd == actual_2nd and pred_3rd == actual_3rd) else '×'
        
        results_list.append({
            'レースコード': race_code,
            '予想1着': pred_1st,
            '予想2着': pred_2nd,
            '予想3着': pred_3rd,
            '実際1着': actual_1st,
            '実際2着': actual_2nd,
            '実際3着': actual_3rd,
            '1着的中': match_1st,
            '2着的中': match_2nd,
            '3着的中': match_3rd,
            '全的中': match_all
        })

if results_list:
    results_df = pd.DataFrame(results_list)
    
    # Calculate statistics
    total_races = len(results_df)
    match_1st_count = (results_df['1着的中'] == '○').sum()
    match_2nd_count = (results_df['2着的中'] == '○').sum()
    match_3rd_count = (results_df['3着的中'] == '○').sum()
    match_all_count = (results_df['全的中'] == '○').sum()
    
    print('=== 的中率レポート（Programs のみモデル）===')
    print(f'レース数: {total_races}')
    print(f'1着的中: {match_1st_count}/{total_races} ({match_1st_count/total_races:.1%})')
    print(f'2着的中: {match_2nd_count}/{total_races} ({match_2nd_count/total_races:.1%})')
    print(f'3着的中: {match_3rd_count}/{total_races} ({match_3rd_count/total_races:.1%})')
    print(f'三連単的中: {match_all_count}/{total_races} ({match_all_count/total_races:.1%})')
else:
    print('No results to display')

=== 的中率レポート（Programs のみモデル）===
レース数: 3660
1着的中: 1969/3660 (53.8%)
2着的中: 923/3660 (25.2%)
3着的中: 723/3660 (19.8%)
三連単的中: 245/3660 (6.7%)


### 6. 推定結果を CSV に出力

In [26]:
# レース単位で三連単の予想を整形して出力
output_records = []

for race_code in sorted(race_predictions.keys()):
    predicted_sanrentan = race_predictions[race_code]
    if predicted_sanrentan is not None:
        output_records.append({
            'レースコード': race_code,
            '予想1着': predicted_sanrentan[0],
            '予想2着': predicted_sanrentan[1],
            '予想3着': predicted_sanrentan[2]
        })

if output_records:
    output_df = pd.DataFrame(output_records)
    
    # 出力ディレクトリを作成
    output_dir = repo_root / 'data' / 'estimate' / '2026' / '01'
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # ファイル名
    output_path = output_dir / 'program_estimate.csv'
    
    # CSV に出力
    output_df.to_csv(output_path, index=False)
    
    print(f'Output saved to: {output_path}')
    print(f'Total predictions: {len(output_df)}')
else:
    print('No predictions to output')

Output saved to: /Users/mahiguch/dev/boatrace/data/data/estimate/2026/01/program_estimate.csv
Total predictions: 5128


### 7. 的中率分析（レース場、レース回、日次別）

In [27]:
# レース場ごとの的中率
print('=== レース場ごとの的中率 ===')
print()

# results_df に レース場と日次を追加
results_detailed = results_df.copy()

# test_data からレース場と日次の情報を取得
race_info = test_data[['レースコード', 'レース場', 'レース回', '日次数']].drop_duplicates()

# マージ
results_detailed = results_detailed.merge(race_info, on='レースコード', how='left')

# レース場ごとの集計
stadium_stats = results_detailed.groupby('レース場').agg({
    'レースコード': 'count',
    '全的中': lambda x: (x == '○').sum()
}).rename(columns={'レースコード': '総レース数', '全的中': '的中数'})

stadium_stats['的中率'] = stadium_stats['的中数'] / stadium_stats['総レース数']
stadium_stats = stadium_stats.sort_index()

print('レース場 | 総レース数 | 的中数 | 的中率')
print('-' * 50)
for stadium, row in stadium_stats.iterrows():
    races = int(row['総レース数'])
    hits = int(row['的中数'])
    accuracy = row['的中率']
    print(f'{int(stadium):2d}     | {races:6d}   | {hits:3d}   | {accuracy:.1%}')

print()
print('=== レース回ごとの的中率 ===')
print()

# レース回ごとの集計
race_stats = results_detailed.groupby('レース回').agg({
    'レースコード': 'count',
    '全的中': lambda x: (x == '○').sum()
}).rename(columns={'レースコード': '総レース数', '全的中': '的中数'})

race_stats['的中率'] = race_stats['的中数'] / race_stats['総レース数']
race_stats = race_stats.sort_index()

print('レース回 | 総レース数 | 的中数 | 的中率')
print('-' * 50)
for race_num, row in race_stats.iterrows():
    races = int(row['総レース数'])
    hits = int(row['的中数'])
    accuracy = row['的中率']
    print(f'{str(race_num):4s}  | {races:6d}   | {hits:3d}   | {accuracy:.1%}')

print()
print('=== 日次ごとの的中率 ===')
print()

# 日次ごとの集計
day_stats = results_detailed.groupby('日次数').agg({
    'レースコード': 'count',
    '全的中': lambda x: (x == '○').sum()
}).rename(columns={'レースコード': '総レース数', '全的中': '的中数'})

day_stats['的中率'] = day_stats['的中数'] / day_stats['総レース数']
day_stats = day_stats.sort_index()

print('日次 | 総レース数 | 的中数 | 的中率')
print('-' * 50)
for day_num, row in day_stats.iterrows():
    races = int(row['総レース数'])
    hits = int(row['的中数'])
    accuracy = row['的中率']
    print(f'{int(day_num)}     | {races:6d}   | {hits:3d}   | {accuracy:.1%}')

print()
print('=== レース場別の詳細統計 ===')
print()

# 1着、2着、3着の的中率も合わせて表示
for stadium in sorted(results_detailed['レース場'].unique()):
    if pd.isna(stadium):
        continue
    
    stadium_data = results_detailed[results_detailed['レース場'] == stadium]
    match_1st = (stadium_data['1着的中'] == '○').sum()
    match_2nd = (stadium_data['2着的中'] == '○').sum()
    match_3rd = (stadium_data['3着的中'] == '○').sum()
    match_all = (stadium_data['全的中'] == '○').sum()
    total = len(stadium_data)
    
    acc_1st = match_1st / total
    acc_2nd = match_2nd / total
    acc_3rd = match_3rd / total
    acc_all = match_all / total
    
    print(f'レース場 {int(stadium):2d}: 1着 {acc_1st:.1%} | 2着 {acc_2nd:.1%} | 3着 {acc_3rd:.1%} | 三連単 {acc_all:.1%} ({total} レース)')

=== レース場ごとの的中率 ===

レース場 | 総レース数 | 的中数 | 的中率
--------------------------------------------------
 1     |    204   |   6   | 2.9%
 2     |    180   |  10   | 5.6%
 3     |    168   |   9   | 5.4%
 4     |    203   |  10   | 4.9%
 5     |    175   |  12   | 6.9%
 6     |    192   |  14   | 7.3%
 7     |    216   |  13   | 6.0%
 8     |     84   |   4   | 4.8%
 9     |    159   |  10   | 6.3%
10     |    162   |  14   | 8.6%
11     |    172   |  18   | 10.5%
12     |    192   |  18   | 9.4%
13     |    135   |   6   | 4.4%
14     |    101   |   8   | 7.9%
15     |    144   |  13   | 9.0%
16     |    102   |   4   | 3.9%
17     |    132   |   9   | 6.8%
18     |    180   |   9   | 5.0%
19     |    192   |  15   | 7.8%
20     |     94   |   9   | 9.6%
21     |     75   |   5   | 6.7%
22     |    101   |   0   | 0.0%
23     |    194   |  10   | 5.2%
24     |    192   |  19   | 9.9%

=== レース回ごとの的中率 ===

レース回 | 総レース数 | 的中数 | 的中率
--------------------------------------------------
10R   |    314

### 7. レース場7,12,20 × レース回10R,11R,12R の詳細分析

In [28]:
# 高い的中率を示しているレース場とレース回の組み合わせ分析
target_stadiums = [7, 12, 20]
target_races = ['10R', '11R', '12R']

# フィルタリング
filtered_data = results_detailed[
    (results_detailed['レース場'].isin(target_stadiums)) &
    (results_detailed['レース回'].isin(target_races))
].copy()

print('=== レース場 7,12,20 × レース回 10R,11R,12R の的中率 ===')
print()

if len(filtered_data) > 0:
    # 全体の統計
    total_races = len(filtered_data)
    total_hits = (filtered_data['全的中'] == '○').sum()
    overall_accuracy = total_hits / total_races if total_races > 0 else 0
    
    print(f'対象レース数: {total_races}')
    print(f'三連単的中数: {total_hits}')
    print(f'三連単的中率: {overall_accuracy:.1%}')
    print()
    
    # レース場 × レース回の組み合わせ別
    print('組み合わせ別の的中率:')
    print('-' * 70)
    print('レース場 | レース回 | 総レース数 | 的中数 | 的中率 | 1着的中率 | 2着的中率 | 3着的中率')
    print('-' * 70)
    
    for stadium in sorted(target_stadiums):
        for race in sorted(target_races):
            combo_data = filtered_data[
                (filtered_data['レース場'] == stadium) &
                (filtered_data['レース回'] == race)
            ]
            
            if len(combo_data) > 0:
                total = len(combo_data)
                hits = (combo_data['全的中'] == '○').sum()
                accuracy = hits / total
                
                acc_1st = (combo_data['1着的中'] == '○').sum() / total
                acc_2nd = (combo_data['2着的中'] == '○').sum() / total
                acc_3rd = (combo_data['3着的中'] == '○').sum() / total
                
                print(f'{int(stadium):2d}     | {race:4s}  | {total:6d}     | {hits:3d}   | {accuracy:.1%}  | {acc_1st:.1%}    | {acc_2nd:.1%}    | {acc_3rd:.1%}')
    
    print()
    print('=== 詳細（的中したレースと外れたレースの例） ===')
    print()
    
    # 的中したレース
    hits_data = filtered_data[filtered_data['全的中'] == '○']
    if len(hits_data) > 0:
        print(f'的中例（最初の5件）:')
        for idx, (_, row) in enumerate(hits_data.head(5).iterrows()):
            print(f'  レース場 {int(row["レース場"]):2d}, {row["レース回"]}: 予想 {row["予想1着"]}-{row["予想2着"]}-{row["予想3着"]} = 実績 {row["実際1着"]}-{row["実際2着"]}-{row["実際3着"]}')
    
    print()
    
    # 外れたレース
    misses_data = filtered_data[filtered_data['全的中'] == '×']
    if len(misses_data) > 0:
        print(f'外れた例（最初の5件）:')
        for idx, (_, row) in enumerate(misses_data.head(5).iterrows()):
            print(f'  レース場 {int(row["レース場"]):2d}, {row["レース回"]}: 予想 {row["予想1着"]}-{row["予想2着"]}-{row["予想3着"]} ≠ 実績 {row["実際1着"]}-{row["実際2着"]}-{row["実際3着"]}')
else:
    print('該当するレースがありません')

=== レース場 7,12,20 × レース回 10R,11R,12R の的中率 ===

対象レース数: 126
三連単的中数: 13
三連単的中率: 10.3%

組み合わせ別の的中率:
----------------------------------------------------------------------
レース場 | レース回 | 総レース数 | 的中数 | 的中率 | 1着的中率 | 2着的中率 | 3着的中率
----------------------------------------------------------------------
 7     | 10R   |     18     |   2   | 11.1%  | 88.9%    | 33.3%    | 22.2%
 7     | 11R   |     18     |   3   | 16.7%  | 72.2%    | 38.9%    | 33.3%
 7     | 12R   |     18     |   0   | 0.0%  | 77.8%    | 16.7%    | 11.1%
12     | 10R   |     16     |   2   | 12.5%  | 68.8%    | 25.0%    | 18.8%
12     | 11R   |     16     |   0   | 0.0%  | 81.2%    | 18.8%    | 18.8%
12     | 12R   |     16     |   3   | 18.8%  | 75.0%    | 37.5%    | 31.2%
20     | 10R   |      8     |   1   | 12.5%  | 87.5%    | 12.5%    | 25.0%
20     | 11R   |      8     |   0   | 0.0%  | 100.0%    | 25.0%    | 0.0%
20     | 12R   |      8     |   2   | 25.0%  | 87.5%    | 37.5%    | 25.0%

=== 詳細（的中したレースと外れたレースの例） ===

的中例