In [85]:
import os
import SimpleITK as sitk
import glob
import monai
from monai.transforms import (

    AsDiscrete,
    RandAdjustContrastd,
    AsDiscreted,
    EnsureChannelFirstd,
    Compose,
    CropForegroundd,
    LoadImaged,
    Orientationd,
    RandCropByPosNegLabeld,
    SaveImaged,
    ScaleIntensityRanged,
    Spacingd,
    AddChanneld,
    SpatialPadd,
    RandRotate90d,
    RandShiftIntensityd,
    EnsureTyped,
    EnsureType,
    MapTransform,
    Resized,
    Invertd,
    ToTensord,
    NormalizeIntensityd,
    RandFlipd,
    Lambdad,
    Activations,
    AsDiscrete,
)
from monai.metrics import ROCAUCMetric
from monai.data import CacheDataset, ThreadDataLoader,DataLoader, Dataset, decollate_batch,load_decathlon_datalist
import torch
import torch.nn as  nn
from torch.nn import Linear,  Softmax
import torch.nn.functional as F
from monai.utils import first, set_determinism
from random import shuffle, seed
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('./log/tensorboard')

torch.multiprocessing.set_sharing_strategy('file_system')
set_determinism(seed=1)

import pandas as pd
df_raw = pd.read_csv('/app/liucd/判定_fill_df.csv')
df_cli = df_raw[['patient_ID', 'T_stage', 'HER2_status', 'NAC_classification', 'ER_percentage', 'PR_percentage', 'Ki_67']]


syf_adcdir = '/app/liucd/deeplearn_dec/DL_dec/data_adc/syf/Mixed'
zy_adcdir = '/app/liucd/deeplearn_dec/DL_dec/data_adc/zunyi/Mixed'
sd_adcdir = '/app/liucd/deeplearn_dec/DL_dec/data_adc/shandong/Mixed'
yizhong_adcdir = '/app/liucd/deeplearn_dec/DL_dec/data_adc/yizhong/Mixed'
xian_adcdir = '/app/liucd/deeplearn_dec/DL_dec/data_adc/xian/Mixed'

syf_dcedir = '/app/liucd/deeplearn_dec/DL_dec/data/syf/Mixed'
zy_dcedir = '/app/liucd/deeplearn_dec/DL_dec/data/zunyi/Mixed'
sd_dcedir = '/app/liucd/deeplearn_dec/DL_dec/data/shandong/Mixed'
yizhong_dcedir = '/app/liucd/deeplearn_dec/DL_dec/data/yizhong/Mixed'
xian_dcedir = '/app/liucd/deeplearn_dec/DL_dec/data/xian/Mixed'

file_path  = '/app/liucd/5center-1310-all+6status-pcr.csv'

train_adcimages = sorted(glob.glob(os.path.join(syf_adcdir,  '*.nii.gz'))) + \
                 sorted(glob.glob(os.path.join(zy_adcdir,  '*.nii.gz')))


train_dceimages = sorted(glob.glob(os.path.join(syf_dcedir,  '*.nii.gz'))) + \
                 sorted(glob.glob(os.path.join(zy_dcedir,  '*.nii.gz')))

val_adcimages =  sorted(glob.glob(os.path.join(sd_adcdir,  '*.nii.gz'))) + \
                 sorted(glob.glob(os.path.join(yizhong_adcdir,  '*.nii.gz'))) + \
                 sorted(glob.glob(os.path.join(xian_adcdir,  '*.nii.gz'))) 

val_dceimages =  sorted(glob.glob(os.path.join(sd_dcedir,  '*.nii.gz'))) + \
                 sorted(glob.glob(os.path.join(yizhong_dcedir,  '*.nii.gz'))) + \
                 sorted(glob.glob(os.path.join(xian_dcedir,  '*.nii.gz'))) 

rad_features = ['patient_id', 'dce-delta-log-sigma-5-0-mm-3D_glcm_DifferenceEntropy',
 'dce-bf-log-sigma-3-0-mm-3D_firstorder_Skewness',
 'dce-bf-log-sigma-4-0-mm-3D_gldm_DependenceEntropy',
 'dce-bf-log-sigma-5-0-mm-3D_firstorder_Skewness',
 'dce-bf-wavelet-LHL_firstorder_Mean',
 'dce-bf-wavelet-LHL_glcm_DifferenceEntropy',
 'dce-bf-wavelet-LHL_glrlm_RunEntropy',
 'dce-bf-wavelet-HLL_glszm_ZoneEntropy',
 'adc-bl-wavelet-LLL_gldm_SmallDependenceEmphasis']

def merge_df(df_raw_path):
    """
    输入原始的文件路径：没有delta的
    输出 既有原有的，又有delta的列
    """
    df_raw = pd.read_csv(df_raw_path)
    print(len(df_raw))
    cols = df_raw.columns[1: -1]
    
    col_bl = [name for name in cols if '-bl-' in name]
    col_bf = [name for name in cols if '-bf-' in name]
    print(len(col_bl), len(col_bf))
    df_first = df_raw[col_bl]
    df_second = df_raw[col_bf]
    df_first.columns = [name.replace('-bl-', '-delta-') for name in df_first.columns]
    df_second.columns = [name.replace('-bf-', '-delta-') for name in df_second.columns]
    
    df_delta = df_second - df_first  # 两个dataframe相减需要相同的行名和列名
    
    df_delta.insert(loc= 0 , column= 'patient_id', value= df_raw['patient_id'])
    
    df_merge = pd.merge(df_delta, df_raw, left_on='patient_id', right_on='patient_id')  # 方法2
    df_merge = pd.concat([df_delta, df_raw[df_raw.columns[1: ]]], axis=1)  
    
    return df_merge

df_rad = merge_df(file_path)
df_rad = df_rad[rad_features]

train_clinical = []
for file_path in train_adcimages:
    p_id = file_path.split('_')[-4]
    clinical_data = df_cli[df_cli['patient_ID'] == int(p_id)].values.tolist()[0][1:]
    train_clinical.append(clinical_data)

val_clinical = []
for file_path in val_adcimages:
    p_id = file_path.split('_')[-4]
    clinical_data = df_cli[df_cli['patient_ID'] == int(p_id)].values.tolist()[0][1:]
    val_clinical.append(clinical_data)

    
train_rad = []
for file_path in train_adcimages:
    p_id = file_path.split('_')[-4]
    rad_data = df_rad[df_rad['patient_id'] == int(p_id)].values.tolist()[0][1:]
    train_rad.append(rad_data)

val_rad = []
for file_path in val_adcimages:
    p_id = file_path.split('_')[-4]
    rad_data = df_rad[df_rad['patient_id'] == int(p_id)].values.tolist()[0][1:]
    val_rad.append(rad_data)


    
train_dict = [{'image_adc': image_adc, 'image_dce': image_dce, 'rad':rad, 'clinical': clinical,  'label': int(image_adc.split('_')[-1].replace('.nii.gz', ''))}
                  for image_adc, image_dce, rad, clinical in zip(train_adcimages,  train_dceimages, train_rad, train_clinical)]
val_dict = [{'image_adc': image_adc, 'image_dce': image_dce,'rad': rad, 'clinical': clinical,  'label': int(image_adc.split('_')[-1].replace('.nii.gz', ''))}
                  for image_adc, image_dce,rad, clinical in zip(val_adcimages, val_dceimages,val_rad, val_clinical)]


1317
2264 2263


In [35]:
train_dict[-1]

{'image_adc': '/app/liucd/deeplearn_dec/DL_dec/data_adc/zunyi/Mixed/2022_10_20_1650366_ADC2_0000_0.nii.gz',
 'image_dce': '/app/liucd/deeplearn_dec/DL_dec/data/zunyi/Mixed/2022_10_20_1650366_+C2_0000_0.nii.gz',
 'rad': [-0.426334239,
  0.315025113,
  8.35283075,
  0.068442358,
  0.121029744,
  2.725956892,
  4.15475501,
  7.113295602,
  0.737770167],
 'clinical': [3.0, 1.0, 2.0, 0.0, 0.0, 0.2],
 'label': 0}

In [38]:
df_cli[df_cli['patient_ID'] == 1650366]

Unnamed: 0,patient_ID,T_stage,HER2_status,NAC_classification,ER_percentage,PR_percentage,Ki_67
207,1650366,3,1,2,0.0,0.0,0.2


In [16]:
df_cli

Unnamed: 0,patient_ID,T_stage,HER2_status,NAC_classification,ER_percentage,PR_percentage,Ki_67
0,966939,4,2,2,0.15,0.0,0.40
1,976497,4,0,2,0.15,0.0,0.05
2,976855,4,0,2,0.15,0.0,0.20
3,978509,4,0,2,0.70,0.9,0.10
4,1001769,2,1,2,0.50,0.1,0.15
...,...,...,...,...,...,...,...
1700,1565305,4,2,1,0.00,0.0,0.15
1701,1726198,2,1,2,0.90,0.8,0.20
1702,1550326,2,2,1,0.00,0.0,0.80
1703,1636744,3,2,1,0.00,0.0,0.60


In [14]:
df_all[rad_features]

Unnamed: 0,patient_id,dce-delta-log-sigma-5-0-mm-3D_glcm_DifferenceEntropy,dce-bf-log-sigma-3-0-mm-3D_firstorder_Skewness,dce-bf-log-sigma-4-0-mm-3D_gldm_DependenceEntropy,dce-bf-log-sigma-5-0-mm-3D_firstorder_Skewness,dce-bf-wavelet-LHL_firstorder_Mean,dce-bf-wavelet-LHL_glcm_DifferenceEntropy,dce-bf-wavelet-LHL_glrlm_RunEntropy,dce-bf-wavelet-HLL_glszm_ZoneEntropy,adc-bl-wavelet-LLL_gldm_SmallDependenceEmphasis
0,976497,0.019366,0.219314,9.036620,0.161089,-1.015850,3.653617,4.870243,7.230573,0.651705
1,976855,-0.588563,-1.679852,7.960455,-1.592570,0.203749,3.318455,4.639385,7.042979,0.785428
2,978509,-0.082875,-0.478516,8.776517,-0.872217,-2.779912,3.168494,4.654114,7.323049,0.709319
3,1001769,-1.203932,-0.840375,7.938833,-0.618427,0.271760,2.916017,4.251457,7.056933,0.805726
4,1006675,-0.413856,-0.623015,8.560586,-0.665192,-0.093608,2.664824,4.230114,7.189380,0.686057
...,...,...,...,...,...,...,...,...,...,...
1311,535156,-0.445091,-0.517992,6.442988,-0.442012,-0.238424,1.859339,3.910243,6.278825,0.722999
1312,576195,-0.931867,-1.136835,6.306491,-0.603338,0.079689,1.208392,3.411577,5.061482,0.666919
1313,574715,-0.356469,-1.350703,7.191078,-1.005959,-3.140468,1.668230,3.711391,6.210419,0.698343
1314,576570,-0.938677,-0.525061,6.411881,-0.302395,-0.050769,1.322478,3.650255,6.541261,0.723284


In [79]:
adc_path = '/app/liucd/deeplearn_dec/DL_dec/data_adc/xian/Mixed'
dce_path = '/app/liucd/deeplearn_dec/DL_dec/data/xian/Mixed'

In [80]:
adc_ids = [filename.split('_')[-4] for filename in os.listdir(adc_path)]
len(adc_ids)

182

In [81]:
dce_ids = [filename.split('_')[-4] for filename in os.listdir(dce_path)]
len(dce_ids)

182

In [82]:
[pid for pid in adc_ids if pid not in dce_ids]

[]

In [83]:
[pid for pid in dce_ids if pid not in adc_ids]

[]

In [86]:
count = 0
for each_data in val_dict:
    adc_id = each_data['image_adc'].split('_')[-4]
    dce_id = each_data['image_dce'].split('_')[-4]
    if (adc_id != dce_id):
        print(each_data['image_adc'], each_data['image_dce'])
        count += 1

In [87]:
xian_ids = pd.read_csv('xian182-all+6status-pcr.csv')['patient_id'].tolist()


In [88]:
[pid for pid in dce_ids if int(pid) not in xian_ids]

['1648732']

In [89]:
[pid for pid in xian_ids if str(pid) not in adc_ids ]

[16487320]

In [69]:
1803239 in xian_ids

True