# 一、构建数据集和图像预处理

In [13]:
import os

import numpy as np
import pydicom.filereader
from tqdm import trange

import torch
import torchvision
import torchvision.transforms as transforms

## 1、继承torchvision.transforms.Compose类

In [14]:
class My_Compose(transforms.Compose):
    def __init__(self, transforms):
        super().__init__(self)
        self.transforms = transforms

    def __call__(self, image):
        for t in self.transforms:
            image = t(image)
        return image
    
class My_ToTensor(transforms.ToTensor):
    def __init__(self):
        super().__init__()
        
    def __call__(self, image):
        return self.to_tensor(image)
    
    @staticmethod
    def to_tensor(pic):
        pic = pic[:, :, None]
        img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
        return img.float().div(4096)

class My_Normalize(transforms.Normalize):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, image):
        image = transforms.functional.normalize(image, mean=self.mean, std=self.std)
        return image

## 2、继承torch.utils.data.Dataset类

In [15]:
class Mydataset(torch.utils.data.Dataset):
    '''读取LDCT和NDCT图像，进行归一化和标准化处理，返回(LDCT, NDCT) if "train = True"或(LDCT, NDCT, LD_ds) if "train = False"元组列表'''
    
    def __init__(self, LDCT_root, NDCT_root, transform, normalize, train = True):
        '''请指定LDCT和NDCT图像路径，以及图像预处理transform'''
        super().__init__()
        self.LDCT_root = LDCT_root
        self.NDCT_root = NDCT_root
        self.transform = transform
        self.normalize = normalize
        self.train = train
        
        LDCT_list = os.listdir(LDCT_root)
        NDCT_list = os.listdir(NDCT_root)
        self.data_path = list(zip(LDCT_list, NDCT_list))
        
        if len(LDCT_list) == len(NDCT_list):
            self.len = len(LDCT_list)
        else:
            print('LDCT和NDCT图像数量不一致，请检查!')


    def __getitem__(self, index):
        '''根据索引获取image和label'''
        LD, ND = self.data_path[index]
        LD_path = self.LDCT_root + '\\' + LD
        ND_path = self.NDCT_root + '\\' + ND
        preprocessed = self.get_preprocess(LD_path, ND_path)
        if self.train:
            return preprocessed[0], preprocessed[1]
        else:
            return preprocessed[0], preprocessed[1], LD_path

        
    def __len__(self):
        '''返回数据集长度'''
        return self.len

    def get_preprocess(self, LD_path, ND_path):
        '''读取图像并预处理'''
        # 读取
        LD_ds, LD_image = self.get_dcm_array(LD_path)
        ND_ds, ND_image = self.get_dcm_array(ND_path)
        
        # 归一化和标准化
        LD_image = self.normalize(self.transform(LD_image))
        ND_image = self.normalize(self.transform(ND_image))
        return [LD_image, ND_image, LD_ds]
        
    @staticmethod
    def get_dcm_array(path):
        '''读取dcm，并转换为像素为CT值'''
        ds = pydicom.filereader.dcmread(path)
        return ds, (ds.pixel_array).astype(np.int16)

## 3、计算图像经过my_totensor处理后的mean和std

In [19]:
def cal_mean_and_std():
    root = r'E:\NBIA\Sampling\LDCT_ALL\\'
    dcm_list = os.listdir(root)

    my_totensor = My_ToTensor()
    transform = My_Compose([my_totensor])

    mean_list = []
    std_list = []
#     print(len(dcm_list))
    for idx in trange(len(dcm_list)):
        dcm = dcm_list[idx]
        ds = pydicom.filereader.dcmread(root+dcm)
        img = (ds.pixel_array).astype(np.int16)
        img = transform(img)
        mean_list.append(img.mean())
        std_list.append(img.std())

    # mean
    print('mean_len:', len(mean_list))
    print('mean_sum:', np.array(mean_list).sum())
    print('mean_mean:', np.array(mean_list).sum()/len(mean_list))

    # std
    print('\nstd_len:', len(std_list))
    print('std_sum:', np.array(std_list).sum())
    print('std_mean:', np.array(std_list).sum()/len(std_list))
    print('\n' + '*'*50)

## 4、测试函数

In [20]:
def test():
    LDCT_path = r'E:\Jupyter notebook\LDCT\LDCT'
    NDCT_path = r'E:\Jupyter notebook\LDCT\NDCT'

    my_totensor = My_ToTensor()
    my_normalize = My_Normalize(0.131, 0.121)
    transform = My_Compose([my_totensor])
    normalize = My_Compose([my_normalize])

    train_set = Mydataset(LDCT_root = LDCT_path, NDCT_root = NDCT_path, 
                          matrix = 256, 
                          transform = transform, 
                          normalize = normalize)
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size = 2, 
                                               num_workers = 0,
                                               shuffle = False,)
    dataiter = iter(train_loader)
    data = list(dataiter.next())
    print(len(data))
    for i in range(len(data[0])):
        LD_img = data[0][i]
        Res_img = data[1][i]
        print('图像数量:', len(data[0]))
        print('size:', LD_img.size()       , Res_img.size())
        print('type:', LD_img.type()       , Res_img.type())
        print('max:' , LD_img.max().item() , Res_img.max().item())
        print('min:' , LD_img.min().item() , Res_img.min().item())
        print('mean:', LD_img.mean().item(), Res_img.mean().item())
        print('std:' , LD_img.std().item() , Res_img.std().item())
        print('LD_img:', LD_img)
        print('Res_img:', Res_img)

# Test

In [21]:
if __name__ == '__main__':
    # 计算图像经过my_totensor处理后的mean和std        
    cal_mean_and_std()
    # 测试函数
#     test()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7380/7380 [02:03<00:00, 59.80it/s]


mean_len: 7380
mean_sum: 904.16345
mean_mean: 0.12251537291984248

std_len: 7380
std_sum: 876.4656
std_mean: 0.11876227319402101

**************************************************
