In [None]:
# ! pip install lifelines
# ! pip install scikit-survival



In [6]:
import torch
import torch.nn.functional as F
import numpy as np
from lifelines.utils import concordance_index
from sksurv.metrics import integrated_brier_score
from sksurv.util import Surv
from torch.utils.data import DataLoader
import pandas as pd

import modules.DataAnalysis as DataAnalysis
import modules.ModelAnalysis as ModelAnalysis
import modules.DataModify as DataModify
from modules.DataSelect import DataPreprocessing

import modules.Models as Models

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
def evaluate_deephit(model, test_loader, y_train, y_test, device='cuda', threshold=0.9):
    """
    DeepHit 모델 평가 함수
    - C-index
    - Integrated Brier Score (IBS)
    - 예측 시간 평균 오차 (MAE)
    
    마지막 시간 bin은 dummy이므로 제거 후 계산
    """
    model.eval()
    all_risk = []
    all_surv = []
    all_times = []
    all_events = []

    pred_times_list = []

    with torch.no_grad():
        for x, times, events in test_loader:
            x = x.to(device)
            _, pmf, cif = model(x)  # pmf, cif 반환 (B, num_events, time_bins)

            # -----------------------------
            # 마지막 더미 시간 bin 제거
            # -----------------------------
            pmf = pmf[:, :, :-1]       # (B, num_events, time_bins-1)
            cif = cif[:, :, :-1]
            survival = 1 - cif.sum(dim=1)  # (B, time_bins-1)

            # -----------------------------
            # Risk score 계산
            # -----------------------------
            risk_score = pmf.sum(dim=(1, 2))  # (B,)

            all_risk.append(risk_score.cpu())
            all_surv.append(survival.cpu())
            all_times.append(times.cpu())
            all_events.append(events.cpu())

            # -----------------------------
            # 예측 시간 계산
            # -----------------------------
            pmf_np = pmf.cpu().numpy()  # (B, num_events, time_bins)
            batch_size, num_events, time_bins = pmf_np.shape

            for i in range(batch_size):
                surv_prob = 1.0
                pred_time = None
                for t in range(time_bins):
                    surv_prob *= (1 - pmf_np[i, :, t].sum())
                    if surv_prob <= threshold and pred_time is None:
                        pred_time = t
                if pred_time is None:
                    pred_time = time_bins - 1
                pred_times_list.append(pred_time)

    # -----------------------------
    # Tensor → NumPy 변환
    # -----------------------------
    risk_score = torch.cat(all_risk).numpy()
    survival = torch.cat(all_surv).numpy()
    times = torch.cat(all_times).numpy()
    events = torch.cat(all_events).numpy()
    pred_times = np.array(pred_times_list)

    # -----------------------------
    # Concordance Index 계산
    # -----------------------------
    c_index = concordance_index(
        event_times=times,
        predicted_scores=-risk_score,
        event_observed=events
    )

    # -----------------------------
    # Integrated Brier Score 계산
    # -----------------------------
    y_test_surv = Surv.from_arrays(
        event=events.astype(bool),
        time=times.astype(float)
    )
    max_time = int(y_test_surv["time"].max())
    survival = survival[:, :max_time]
    eval_times = np.arange(max_time)
    ibs = integrated_brier_score(y_train, y_test_surv, survival, eval_times)

    # -----------------------------
    # 평균 절대 예측 시간 오차 (MAE) 계산
    # -----------------------------
    mae = np.mean(np.abs(pred_times - times))

    # -----------------------------
    # 결과 출력
    # -----------------------------
    print(f"Concordance Index (C-index): {c_index:.4f}")
    print(f"Integrated Brier Score (IBS): {ibs:.4f}")
    print(f"Mean Absolute Error (MAE) of predicted time: {mae:.4f}")

    return c_index, ibs, mae, pred_times

In [8]:
# 경로 지정
# CSV 읽기 + 첫 열 제거
df = pd.read_csv('./data/test dataset.csv')
df = df.drop(df.columns[0], axis=1)  # 첫 열 제거
df.to_csv('./data/test dataset_fixed.csv', index=False)

# Dataset 로드
test_file = ['./data/test dataset_fixed.csv']
test_dataset = DataModify.CancerDataset(
    target_column='event',
    time_column='time',
    file_paths=test_file
)

test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# IBS 계산용 Surv 형식 생성
test_times = test_dataset.time.numpy()
test_events = test_dataset.target.numpy()

# y_test만 있으면 IBS 계산 시 train은 동일 형식 dummy로 전달 가능
y_test = Surv.from_arrays(event=test_events.astype(bool),
                          time=test_times.astype(float))


In [9]:
input_params_path = './parameters/deephit_model_2D_CNN.pth'

input_dim = 17   # input dimension : data의 feature의 개수
hidden_size = (128, 64)             # 1번째, 2번째 hidden layer의 size
time_bins = 91                     # 3개월 단위로 time을 split하여 각 구간으로 삼음 -> 270개월+ 는 하나로 취급
num_events = 4                      # 사건의 개수

# 모델 선언
model = Models.DeepHitSurvWithSEBlockAnd2DCNN(input_dim, hidden_size, time_bins, num_events, dropout=.2).to(device)
model.load_state_dict(torch.load(input_params_path, map_location=device))
model.to(device)
model.eval()  # 평가 모드

DeepHitSurvWithSEBlockAnd2DCNN(
  (se_block): Sequential(
    (0): Linear(in_features=17, out_features=4, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4, out_features=17, bias=True)
    (3): Sigmoid()
  )
  (se_block_event): ModuleList(
    (0-3): 4 x Sequential(
      (0): Linear(in_features=64, out_features=16, bias=True)
      (1): ReLU()
      (2): Linear(in_features=16, out_features=64, bias=True)
      (3): Sigmoid()
    )
  )
  (shared): Sequential(
    (0): Linear(in_features=17, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
  )
  (heads): ModuleList(
    (0-3): 4 x Linear(in_features=64, out_features=91, bias=True)
  )
  (conv2d_block): Sequential(
    (0): Conv2d(1, 8, kernel_size=(2, 5), stride=(1, 1), padding=(1, 2))
    (1): ReLU()
    (2): Conv2d(8, 16, kernel_size=(2, 3), stride=(1, 1), padding=(0, 1))
    (3)

In [None]:
y_train_dummy = y_test.copy()

# 평가 실행
c_index, ibs, mae, _ = evaluate_deephit(model, test_loader, y_train_dummy, y_test, device=device)


Concordance Index (C-index): 0.8263
Integrated Brier Score (IBS): 0.2005


In [11]:
def compute_risk_score_sigmoid(pmf, time_lambda=0.05, event_weights=None):
    """
    pmf: torch.Tensor, shape (B, E, T) - 사건별 시간 확률
    time_lambda: float, 지수 감쇠 계수 (시간대 가중치)
    event_weights: list or torch.Tensor, 길이 E, 사건별 가중치
    """
    B, E, T = pmf.shape
    device = pmf.device

    # 시간 가중치
    time_weights = torch.exp(-time_lambda * torch.arange(T, device=device))
    
    # 사건 가중치
    if event_weights is None:
        event_weights = torch.ones(E, device=device)
    else:
        event_weights = torch.tensor(event_weights, device=device, dtype=torch.float32)
    
    # 가중치 적용
    weighted_pmf = pmf * time_weights.view(1, 1, T)
    weighted_pmf = weighted_pmf * event_weights.view(1, E, 1)

    # 가중합 계산
    risk_score_raw = weighted_pmf.sum(dim=(1, 2))

    # 0 기준으로 offset 제거 → 음수도 나오게
    risk_score_raw = risk_score_raw - risk_score_raw.mean()

    # 시그모이드 + 0~100 스케일
    risk_score = torch.sigmoid(risk_score_raw) * 100

    return risk_score

def get_pmf_from_model(model, loader, device=device):
    model.eval()
    all_pmf = []
    all_times = []
    all_events = []
    with torch.no_grad():
        for x, times, events in loader:
            x = x.to(device)
            logits, pmf, _ = model(x)  # CIF는 필요 없음

            pmf = pmf[:, :, :91]  # (batch_size, num_events, time_bins-1)
            
            all_pmf.append(pmf.cpu())
            all_times.append(times)
            all_events.append(events)
    all_pmf = torch.cat(all_pmf, dim=0)  # (num_samples, num_events, time_bins)
    all_times = torch.cat(all_times, dim=0)
    all_events = torch.cat(all_events, dim=0)
    return all_pmf, all_times, all_events
 
# train set PMF 추출
pmf_train, times_train, events_train = get_pmf_from_model(model, test_loader)

# 사건별 가중치 설정
event_weights = [2.0, 3.0, 3.0, 15.0]  # 예시

# 위험 점수 계산 (시그모이드 + 0~100)
risk_scores = compute_risk_score_sigmoid(pmf_train, time_lambda=0.05, event_weights=event_weights).numpy()

# 통계 확인
print("최대값:", np.max(risk_scores))
print("최소값:", np.min(risk_scores))
print("평균값:", np.mean(risk_scores))
print("앞 10개 값:", risk_scores[:10])

# 사건별 통계
events_np = events_train.numpy()
unique_events = np.unique(events_np)

print("=== 라벨별 Risk Score 통계 ===")
for e in unique_events:
    mask = (events_np == e)
    scores_e = risk_scores[mask]
    if len(scores_e) == 0:
        continue
    print(f"\nEvent {e}:")
    print(f"  개수: {len(scores_e)}")
    print(f"  최대값: {np.max(scores_e):.4f}")
    print(f"  최소값: {np.min(scores_e):.4f}")
    print(f"  평균값: {np.mean(scores_e):.4f}")

ImportError: cannot import name 'ERR_IGNORE' from 'numpy.core.umath' (/opt/anaconda3/envs/nlp/lib/python3.9/site-packages/numpy/core/umath.py)