##### 0. 필요한 라이브러리 추가 설치
###### Dicom 및 nifti 파일 저장 및 불러오기에 활용할 SimpleITK 라이브러리 설치
###### Radiomics feature 추출 및 데이터 가시화를 위한 seaborn 라이브러리 설치

In [None]:
!pip install SimpleITK pyradiomics seaborn

##### 1. 사용할 라이브러리 임포트
###### 인공지능 모델 학습을 위한 데이터 로드 및 전처리, 학습 과정에서 필요한 라이브러리 임포트

In [None]:
import os
import json
import SimpleITK as sitk
import numpy as np
import cv2
import matplotlib.pyplot as plt

##### 2. 데이터 경로
###### 사용될 데이터 경로 확인 및 설정

In [None]:
!pwd

In [None]:
# Checking data path
path_df = '/home/user/workdir/notices/data'
path_data = os.path.join(path_df, 'breast')
# path_data = os.path.join(os.path.abspath('..'), 'data')
path_abimg = os.path.join(path_data, 'image', 'abnormal')
path_nrimg = os.path.join(path_data, 'image', 'normal')

for (path, dir, file) in os.walk(path_data):
    print("path:", path)
    print("dir:", dir)
    print("file:", file)
    print("----------")

## Getting data name
abnameList = sorted([name.split('.')[0] for name in os.listdir(path_abimg) if name.endswith('.dcm')])
nrnameList = sorted([name.split('.')[0] for name in os.listdir(path_nrimg) if name.endswith('.dcm')])

print(f'Abnormal: {len(abnameList)}')
print(f'Normal: {len(nrnameList)}')


In [None]:
print(f'Abnormal name List: {abnameList}')
print(f'normal name List: {nrnameList}')

##### 3. 데이터 확인
###### - 데이터 경로에 저장된 데이터 확인
###### - 데이터에 저장된 환자의 정보값과 이미지 정보, 병변 정보 등을 시각화하여 확인

In [None]:
# Checking sample images & information
## checking information from a dicom file
abname = abnameList[0]
abimg = sitk.ReadImage(os.path.join(path_abimg, f'{abname}.dcm'))
for k in abimg.GetMetaDataKeys():
    v = abimg.GetMetaData(k)
    print(f'({k}) = = "{v}"')

print(f"Image Size: {abimg.GetSize()}")
print(f"Image PixelType: {sitk.GetPixelIDValueAsString(abimg.GetPixelID())}")

In [None]:
## checking information from a json file
with open(os.path.join(path_data, 'label', 'json', 'abnormal', f'{abname}.json'), 'r') as info_json:
    patient_info = json.load(info_json) # dictionary 형태로 로드

print(patient_info)
print(patient_info.keys())

In [None]:
patient_info['patientInfo']

In [None]:
## checking an image
abname = nrnameList[9]
img = sitk.ReadImage(os.path.join(path_nrimg, f'{abname}.dcm'))

img = sitk.GetArrayFromImage(img)[0]
plt.imshow(img, cmap='gray')

In [None]:
## checking sample images
n_sample = 5

for n in range(0, len(abnameList), len(abnameList)//n_sample):
    
    abimg = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(path_abimg, f'{abnameList[n]}.dcm')))[0]
    nrimg = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(path_nrimg, f'{nrnameList[n]}.dcm')))[0]
    
    plt.subplot(1,2,1)
    plt.title(f'Abnormal: {abnameList[n]}')
    plt.imshow(abimg,cmap='gray')
    plt.axis('off')
    
    plt.subplot(1,2,2)
    plt.title(f'Normal: {nrnameList[n]}')
    plt.imshow(nrimg,cmap='gray')
    plt.axis('off')
    plt.show()

In [None]:
# lesion type 확인
abnameList = sorted([name.split('.')[0] for name in os.listdir(os.path.join(path_data, 'label', 'json', 'abnormal')) if name.endswith('.json')])

mnList = list()
bnList = list()

for n, abname in enumerate(abnameList):
    with open(os.path.join(path_data, 'label', 'json', 'abnormal', f'{abname}.json'), 'r') as info_json:
        patient_info = json.load(info_json) # dictionary 형태로 로드
        abtype = patient_info['patientInfo']['lesion_type']
        if abtype == 'malignant':
            mnList.append(abname)
        elif abtype == 'benign':
            bnList.append(abname)
        else:
            print(f'Unknown: {abname}')

In [None]:
print(f'악성: {len(mnList)} | 양성: {len(bnList)}')

In [None]:
## checking malignant lesion samples
n_sample = 5
for n, name in enumerate(mnList[:n_sample]):
    
    abimg = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(path_abimg, f'{name}.dcm')))[0]
    ablab = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(path_data, 'label', 'nii', f'{name}.nii')))

    plt.figure(figsize=(10,30))    
    plt.subplot(1,3,1)
    plt.title(f'mn: {name}')
    plt.imshow(abimg,cmap='gray')
    plt.axis('off')
    
    plt.subplot(1,3,2)
    plt.title(f'Label')
    plt.imshow(ablab,cmap='gray')
    plt.axis('off')

    plt.subplot(1,3,3)
    plt.title(f'Overlay')
    plt.imshow(abimg,cmap='gray', alpha=0.8)
    plt.imshow(ablab,cmap='Reds', alpha=0.4)
    
    plt.axis('off')
    


In [None]:
## checking benign lesion samples
n_sample = 5
for n, name in enumerate(bnList[:n_sample]):
    
    abimg = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(path_abimg, f'{name}.dcm')))[0]
    ablab = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(path_data, 'label', 'nii', f'{name}.nii')))

    plt.figure(figsize=(10,30))    
    plt.subplot(1,3,1)
    plt.title(f'bn: {name}')
    plt.imshow(abimg,cmap='gray')
    plt.axis('off')
    
    plt.subplot(1,3,2)
    plt.title(f'Label')
    plt.imshow(ablab,cmap='gray')
    plt.axis('off')

    plt.subplot(1,3,3)
    plt.title(f'Overlay')
    plt.imshow(abimg,cmap='gray', alpha=0.8)
    plt.imshow(ablab,cmap='Reds', alpha=0.4)
    
    plt.axis('off')
    
