In [1]:
import os
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from fttransformer_mask import FTTransformer  # あなたのモデル定義に合わせて

# ====== 設定 ======
DATA_PATH = "cardio_train.csv"  # あなたのCSVのパス
TEST_RATIO = 0.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# カテゴリ特徴と連続値特徴
categorical_features = ["gender", "cholesterol", "gluc", "smoke", "alco", "active"]
continuous_features = ["age", "height", "weight", "ap_hi", "ap_lo"]
additional_features = ["cholesterol", "gluc", "smoke", "alco", "active"]

# マスク対象は cholesterol 〜 active の5つ（インデックス 1〜5）
# マスクテンプレート（12次元）：[gender(0), c(1), g(2), s(3), a(4), ac(5), age(6), h(7), w(8), hi(9), lo(10)]
MASK_BASE = [False,False] + [True] * 5 + [False] * 5  # cholesterol〜activeがTrue

# モデルパスの手動指定
fold_to_model_path = {
    1: "./save_mask_models/best_model_epoch300_fold1_ValAcc0.732.pth",
    #2: "./save_mask_models/best_model_epoch300_fold2_ValAcc0.742.pth",
    #3: "./save_mask_models/best_model_epoch300_fold3_ValAcc0.738.pth",
    #4: "./save_mask_models/best_model_epoch300_fold4_ValAcc0.736.pth",
    #5: "./save_mask_models/best_model_epoch300_fold5_ValAcc0.739.pth",
}

# ====== データ読み込みと前処理 ======
data = pd.read_csv(DATA_PATH, sep=';')
ids = data["id"].values
X = data.drop(columns=["cardio"])
y = data["cardio"]

# テストデータを固定で20%抽出
X_trainval, X_test, y_trainval, y_test, id_trainval, id_test = train_test_split(
    X, y, ids, test_size=TEST_RATIO, random_state=42, stratify=y
)

# 1) カテゴリ特徴：LabelEncoder を trainval で fit → trainval/test で transform
encoders = {}
for col in categorical_features:
    le = LabelEncoder()
    X_trainval[col] = le.fit_transform(X_trainval[col])
    X_test[col]     = le.transform(X_test[col])
    encoders[col]   = le

# 2) 連続値特徴：StandardScaler を trainval で fit → trainval/test で transform
scaler = StandardScaler()
X_trainval[continuous_features] = scaler.fit_transform(X_trainval[continuous_features])
X_test[continuous_features]     = scaler.transform(X_test[continuous_features])


# idを除いたテストデータ
X_test = X_test.drop(columns=["id"])

# ====== 推論実行 ======
for fold, model_path in fold_to_model_path.items():
    if not os.path.exists(model_path):
        print(f"❌ fold{fold}: モデルファイルが見つかりません: {model_path}")
        continue

    print(f"▶ fold{fold}: モデル読み込み中: {model_path}")
    
    model = FTTransformer(
        categories=[X[c].nunique() for c in categorical_features],
        num_continuous=len(continuous_features),
        dim=64, depth=6, heads=8, ff_dropout=0.2, attn_dropout=0.2
    ).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    results = []
    for i in range(len(X_test)):
        row_result = {'id': int(id_test[i])}
        
        # 特徴量をテンソル化
        x_cat_vals = X_test.iloc[i][categorical_features].values
        x_cat = torch.tensor([x_cat_vals], dtype=torch.long).to(device)
        x_cont_vals = X_test.iloc[i][continuous_features].values.astype(np.float32)
        x_cont = torch.tensor([x_cont_vals], dtype=torch.float32).to(device)

        # baseのみ（5つ全てマスク）
        base_mask = torch.tensor([MASK_BASE], dtype=torch.bool).to(device)
        with torch.no_grad():
            conf = torch.sigmoid(model(x_cat, x_cont, base_mask)).item()
        row_result["base"] = conf

        # base+1特徴ずつ非マスク化して推論
        for j, feat in enumerate(additional_features):
            mask = MASK_BASE.copy()
            mask[2 + j] = False  # 該当カテゴリのみ非マスクに
            mask_tensor = torch.tensor([mask], dtype=torch.bool).to(device)
            with torch.no_grad():
                conf = torch.sigmoid(model(x_cat, x_cont, mask_tensor)).item()
            row_result[f"base+{feat}"] = conf

        # 正解ラベル
        row_result["answer"] = int(y_test.iloc[i])
        results.append(row_result)

    # ====== 結果出力 ======
    df_out = pd.DataFrame(results).sort_values(by="id")
    output_path = f"fold{fold}_confidence_output.csv"
    df_out.to_csv(output_path, index=False)
    print(f"✅ fold{fold} の結果を保存: {output_path}")

▶ fold1: モデル読み込み中: ./save_mask_models/best_model_epoch300_fold1_ValAcc0.732.pth


  x_cat = torch.tensor([x_cat_vals], dtype=torch.long).to(device)


✅ fold1 の結果を保存: fold1_confidence_output.csv
