In [1]:
from glob import glob
import os

he_slide_list = glob('../../data/pdl1_wsi_anno/PD-L1(HnE)/*.ndpi')
pdl1_slide_list = glob('../../data/pdl1_wsi_anno/PD-L1(22C3)/*.ndpi')

# prefix 추출 함수 (CODIPAI-STOP-SS-01395 까지만)
def get_prefix(filename):
    basename = os.path.basename(filename)
    # '-'로 split해서 처음 4개 부분만 사용
    parts = basename.split('-')
    if len(parts) >= 4:
        return '-'.join(parts[:4])
    return basename

# 각 리스트의 prefix 추출
he_prefixes = {get_prefix(f): f for f in he_slide_list}
pdl1_prefixes = {get_prefix(f): f for f in pdl1_slide_list}

# 공통 prefix 찾기
common_prefixes = set(he_prefixes.keys()) & set(pdl1_prefixes.keys())

# 매칭되는 것만 필터링
matched_he_slides = [he_prefixes[prefix] for prefix in common_prefixes]
matched_pdl1_slides = [pdl1_prefixes[prefix] for prefix in common_prefixes]

print(f"공통 파일 개수: {len(common_prefixes)}")
print(f"HE slides: {len(matched_he_slides)}")
print(f"PD-L1 slides: {len(matched_pdl1_slides)}")
print(f"\n매칭된 prefix 예시:")
for prefix in sorted(common_prefixes)[:5]:
    print(f"  {prefix}")

공통 파일 개수: 77
HE slides: 77
PD-L1 slides: 77

매칭된 prefix 예시:
  CODIPAI-STBX-SS-04320
  CODIPAI-STBX-SS-04321
  CODIPAI-STBX-SS-04322
  CODIPAI-STBX-SS-04323
  CODIPAI-STBX-SS-04324


{'CODIPAI-STBX-SS-04319-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04320-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04321-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04322-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04323-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04324-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04325-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04326-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04327-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04328-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04329-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04330-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04331-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04332-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04333-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04334-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04335-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04336-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04337-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04338-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04339-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04340-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04341-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04342-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04343-S-EB-01.ndpi',
 'CODIPAI-STBX-SS-04344-S