# 转存cos容器

In [5]:
from ti import session
ti_session = session.Session()
def redictstore(path="./100003_nii", bucket = "lyb-1257812067", key_prefix = "data/bisai"):
    """
    path：结果文件路径。
    bucket：指定存储桶。注意：请指定用户对应地域下的个人 COS 存储桶，使用示例中的存储桶会导致报错。
    key_prefix：存储桶下 COS 路径地址。
    """
    inputs = ti_session.upload_data(path=path, bucket=bucket, key_prefix=key_prefix)

# 使用示例
# redictstore(path='./utils1.ipynb', key_prefix='data/files')

# 复制和移动文件到指定文件夹

In [6]:
import os,shutil
import time

def mymovefile(srcfile,dstfile,pbar=None):
    """
    srcfile：原文件
    dstfile:目标文件
    pbar:tqdm对象
    """
    if not os.path.isfile(srcfile):
        print ("%s not exist!"%(srcfile))
    elif os.path.exists(dstfile):
        if pabr is not None:
            pbar.set_description('{} has existed!'.format(dstfile))
            time.sleep(0.1)
        else:
            print('{} has existed!'.format(dstfile))
    else:
        fpath,fname=os.path.split(dstfile)    #分离文件名和路径
        if not os.path.exists(fpath):
            os.makedirs(fpath)                #创建路径
        shutil.move(srcfile,dstfile)          #移动文件

def mycopyfile(srcfile,dstfile, pbar=None):
    """
    srcfile：原文件
    dstfile:目标文件
    pbar:tqdm对象
    """
    if not os.path.isfile(srcfile):
        print ("%s not exist!"%(srcfile))
    elif os.path.exists(dstfile):
        if pabr is not None:
            pbar.set_description('{} has existed!'.format(dstfile))
            time.sleep(0.1)
        else:
            print('{} has existed!'.format(dstfile))
    else:
        fpath,fname=os.path.split(dstfile)    #分离文件名和路径
#         print(fpath, fname)
        if not os.path.exists(fpath):
            os.makedirs(fpath)                #创建路径
#             print('ok???')
        shutil.copyfile(srcfile,dstfile)      #复制文件
    
    
# 使用示例
# with tqdm(total = len([*zip(complete_maskfiles, mv_maskfiles)]), desc='process...', unit='img') as pbar:
#     for (complete_maskfile, mv_maskfile) in zip(complete_maskfiles, mv_maskfiles):
#         mycopyfile(complete_maskfile, mv_maskfile, pbar)
#         pbar.update(1)
#         pbar.set_postfix(**{'processing file': complete_maskfile+'==>'+mv_maskfile})
        
#     print('done!')

# 切片变3D

In [25]:
def genprocess(path, destpath, suffix='.nii.gz'):
    """
    path: 类似于./taop-2021/100003/0001/0001_Flair，其目录下下为切片
    destpath: 存放根目录.'./100003'
    suffix:3D文件后缀
    return 返回组合的3D图像和对应路径文件名
    """
    if not os.path.exists(destpath):
        os.mkdir(destpath)
    
     # 获取该文件夹下所有序列id
    serid = sitk.ImageSeriesReader.GetGDCMSeriesIDs(path)
    nb_series = len(serid)
    
    if nb_series == 1:
        # 当前序列仅一个序列id时
        series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames(path, serid[0])
#         print(series_file_names) # => ('./taop-2021/100003/0001/0001_Flair/0.dcm', ...)
        ser_rder = sitk.ImageSeriesReader()
        # 加载公开信息
        ser_rder.MetaDataDictionaryArrayUpdateOn()
        # 加载私有信息
        ser_rder.LoadPrivateTagsOn()
        ser_rder.SetFileNames(series_file_names)
        # 读取执行
        image3D = ser_rder.Execute()
        
        # print(serid, filename) => (0001, 0001_Flair)
        sid, filename = path.split('/')[-2:]
        # ./100003/0001
        destpathsid = destpath.rstrip('/')+'/'+sid
        Path(destpathsid).mkdir(parents=True, exist_ok=True)
        # ./100003/0001/0001_Flair.nii.gz
        destfile = destpathsid+'/'+filename+suffix
        sitk.WriteImage(image3D, destfile)

        # print(image3D.GetSize()) # (223, 270, 20)

    else:
        # 存储当前文件夹下所有序列id的3D图和路径（如果有多个）
        image3D = []
        destfile = []

        for i in range(nb_series):

            series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames(path, serid[i])
            ser_rder = sitk.ImageSeriesReader()
#           加载公开信息
            ser_rder.MetaDataDictionaryArrayUpdateOn()
#           加载私有信息
            ser_rder.LoadPrivateTagsOn()
            ser_rder.SetFileNames(series_file_names)
#           读取执行
            img3d = ser_rder.Execute()
            image3D.append(img3d)

            sid, filename = path.split('/')[-2:]
            print(sid, filename)
            destpathsid = destpath+'/'+sid
            Path(destpathsid).mkdir(parents=True, exist_ok=True)

            print(destpathsid)
            destfilei = destpath+'/'+filename+i+suffix
            sitk.WriteImage(image3D, destfilei)
            destfile.append(destfilei)


    return image3D, destfile


# 使用示例
# from tqdm import tqdm
# from pathlib import Path
# import os
# def generate3D(data_path, destpath):
    
#     if not os.path.exists(destpath):
#         os.mkdir(destpath)
#     # 存储全部路径下的3D图像
#     all_3D = []
#     with tqdm(total=len(data_path), desc=f'process...', unit='num') as pbar:
#         for path in data_path:
            
#             image3D, destfile = genprocess(path, destpath)
#             all_3D.append((image3D, destfile))   
#             # print(image3D.GetSize())
#             pbar.set_postfix(**{'name of this process': path})
#             pbar.update(1)
            
#     return all_3D


# all_3D = generate3D(data_path=data_path, destpath='./100003_nii')
# # 963
# print(len(all_3D), all_3D[:10]) # img.shape =>(223, 270, 20) (W, H, S)==(X, Y, Z)

# 获取每一个id后的三种模态，保存为单独的列表data_path
# 记录重复次数


In [8]:
def modal_and_repnum(id_series):
    """
    id_series:原始csv中的id_series列，其重复次数对应于每个模态文件（夹）的不同区域数
    return： data_path 不重复的id序列对应的模态路径， rep_number：每个模态重复次数
    """
    data_path = []
    rep_number = []
    count = 0

    last_path = '/'

    # 重复的文件count+1， 不重复则更改后缀并加入data_path
    for path in id_series:
        if path == last_path:
            count += 1
            continue
        else:
            if count != 0:
                rep_number.append(count)
            count = 0
            id_number, model = path.split('_')
            if model == 't1ce':
                model = 'CET1'
            elif model == 't2':
                model = 'T2'
            else:
                model = 'Flair'
            id_path = os.path.join(csv_root, id_number, id_number+'_'+model)
            data_path.append(id_path)
            last_path = path
            count += 1
    rep_number.append(count)
    return data_path, rep_number

# 使用示例


# 展示某个切片或者3d影像的基本信息

In [10]:
def imgred(filename="./taop-2021/100003/0001/0001_Flair/0.dcm", idx = None):
    """
    filename: 图像文件
    idx: 如果指定，则展示给定切片的信息
    """
    import matplotlib.pyplot as plt
    import numpy as np
    
    file = sitk.ReadImage(filename)
    print(file.GetSize())
    print(file.GetOrigin())
    print(file.GetSpacing())
    print(file.GetDirection())
    print(file.GetMetaDataKeys())
    for key in file.GetMetaDataKeys():
        print(key, ' : ', file.GetMetaData(key))
        
    pixel_array = sitk.GetArrayFromImage(file)
    if idx == None:
        idx = 0
    print(pixel_array.shape, '\n', pixel_array[idx, 100:110, 100:110])
    plt.figure(figsize=(9,9))
    plt.imshow(pixel_array[idx], cmap='gray')
    plt.axis('on') # 关掉坐标轴为 off
    plt.title('image') # 图像题目
    plt.show()
    
# 使用示例
# # ./taop-2021/100003/0001/mask/2.nii.gz
# complete_maskfiles = []
# mv_maskfiles = []
# for i, maskfile in enumerate(mask_files):
#     complete_maskfiles.append(csv_root.rstrip('/')+maskfile)
#     mv_maskfiles.append('./100003_nii'+maskfile)
#     if i == 0:
#         mkfl = csv_root.rstrip('/')+maskfile
#         imgred(mkfl, idx=13)
    
# assert len(complete_maskfiles) == len(mask_files)
# print(complete_maskfiles[:10], mv_maskfiles[:10])

# 存储和读取二进制文件

In [None]:
def binarystore(filename='bi.pkl', content=None, mode='wb'):
    
    import pickle
    # 创建一个二进制文件
    pick_file = open(filename, mode)

    # 使用pickle的函数dump装入文件
    pickle.dump(content, pick_file)

    # 关闭打开的文件完成写入
    pick_file.close()


def binaryread(filename='bi.pkl', mode='rb')
    pickfile = open(filename, model)

    # 读取文件内容到列表，怎么写入的怎么读取
    results = pickle.load(pickfile)
    return results
    
# 使用示例
# """
# 序列id_series, mask 文件列表和病灶id 以及对应的病灶类别(原csv内容对应列)
# id_series
# mask_files
# lesion_id
# class_id 

# data_path 中包含每一个病人的三种模态路径
# rep_number 中包含每一个模态重复的次数，与data_path一一对应，预示着该模态中有几个病灶区域

# complete_maskfiles：完整的mask路径列表
# mv_maskfiles：完整的移动后mask路径列表

# all_3D：生成的全部3D影像列表，格式=》[(image3D对象,对应的路径), (), (),...]

# 注意，上述路径为在虚拟环境中的路径，具体真实路径需要自己分析生成！
# """
# list_combine= [id_series, mask_files, lesion_id, class_id,
#               data_path, rep_number,
#               complete_maskfiles, mv_maskfiles,
#               all_3D
# ]

# filename = 'list_combine.pkl'
# binarystore(filename=filename, content=list_combine)
# binaryread(filename)

# 将列表按照数字排序

In [45]:
def list_sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    """
    def tryint(s):
        try:
            return int(s)
        except:
            return s
    def alphanum_key(s):
        import re 
        """
        s:列表的每一项
        按照数字顺序排序
        """
#         print(s)
        return [tryint(re.findall('[0-9]+', s.split('/')[-1])[0])]
    l.sort(key=alphanum_key)
    return l


# 使用示例
# glob 在windows中按序，linux中乱序
from glob import glob
csv_root = './taop-2021/100003/'

# print(list_sort_nicely(glob(csv_root+'0001/'+'0001_Flair/*')))

['./taop-2021/100003/0001/mask', './taop-2021/100003/0001/0001_Flair', './taop-2021/100003/0001/0001_T2', './taop-2021/100003/0001/0001_CET1']


In [19]:
import pandas as pd
import SimpleITK as sitk

In [17]:
destfiles = './100003_nii_test/'
csv_root = './taop-2021/100003/'
csv_file = csv_root+'test1_data_info.csv'
datas = pd.read_csv(csv_file, usecols=['id_series'])
id_series = datas['id_series'].values
print(id_series[:10])

# 路径生成
data_path, rep_num = modal_and_repnum(id_series)
print(data_path[:10], rep_num[:10])

['0324_flair' '0324_t1ce' '0324_t2' '0325_flair' '0325_t1ce' '0325_t2'
 '0326_flair' '0326_t1ce' '0326_t2' '0327_flair']
['./taop-2021/100003/0324/0324_Flair', './taop-2021/100003/0324/0324_CET1', './taop-2021/100003/0324/0324_T2', './taop-2021/100003/0325/0325_Flair', './taop-2021/100003/0325/0325_CET1', './taop-2021/100003/0325/0325_T2', './taop-2021/100003/0326/0326_Flair', './taop-2021/100003/0326/0326_CET1', './taop-2021/100003/0326/0326_T2', './taop-2021/100003/0327/0327_Flair'] [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [26]:
from tqdm import tqdm
from pathlib import Path
import os
def generate3D(data_path, destpath):
    
    if not os.path.exists(destpath):
        os.mkdir(destpath)
    # 存储全部路径下的3D图像
    all_3D = []
    with tqdm(total=len(data_path), desc=f'process...', unit='num') as pbar:
        for path in data_path:
            
            image3D, destfile = genprocess(path, destpath)
            all_3D.append((image3D, destfile))   
            # print(image3D.GetSize())
            pbar.set_postfix(**{'name of this process': path})
            pbar.update(1)
#             break
            
    return all_3D


# 3D图像生成
all_3D = generate3D(data_path=data_path, destpath=destfiles)
# 963
print(len(all_3D), all_3D[:10]) # img.shape =>(223, 270, 20) (W, H, S)==(X, Y, Z)

process...: 100%|██████████| 300/300 [09:05<00:00,  1.82s/num, name of this process=./taop-2021/100003/0424/0424_T2]   

300 [(<SimpleITK.SimpleITK.Image; proxy of <Swig Object of type 'std::vector< itk::simple::Image >::value_type *' at 0x7efe49213810> >, './100003_nii_test/0324/0324_Flair.nii.gz'), (<SimpleITK.SimpleITK.Image; proxy of <Swig Object of type 'std::vector< itk::simple::Image >::value_type *' at 0x7efe49213ba0> >, './100003_nii_test/0324/0324_CET1.nii.gz'), (<SimpleITK.SimpleITK.Image; proxy of <Swig Object of type 'std::vector< itk::simple::Image >::value_type *' at 0x7efe4807a8d0> >, './100003_nii_test/0324/0324_T2.nii.gz'), (<SimpleITK.SimpleITK.Image; proxy of <Swig Object of type 'std::vector< itk::simple::Image >::value_type *' at 0x7efe4807a3f0> >, './100003_nii_test/0325/0325_Flair.nii.gz'), (<SimpleITK.SimpleITK.Image; proxy of <Swig Object of type 'std::vector< itk::simple::Image >::value_type *' at 0x7efe4807a990> >, './100003_nii_test/0325/0325_CET1.nii.gz'), (<SimpleITK.SimpleITK.Image; proxy of <Swig Object of type 'std::vector< itk::simple::Image >::value_type *' at 0x7efe48




In [46]:
# 创建100个验证集的空mask文件夹，存放预测结果
from glob import glob
from tqdm import tqdm
import os
listpaths = glob(destfiles+'*')
with tqdm(total=len(listpaths), desc='processing...', unit='个') as pbar:
    for path in listpaths:
        if os.path.exists(path+'/mask'):
            continue
        else:
            os.mkdir(path+'/mask')
        pbar.update(1)
        pbar.set_postfix(**{'tips: processing': path+'/mask'})

processing...: 100%|██████████| 100/100 [00:00<00:00, 1371.91个/s, tips: processing=./100003_nii_test/0412/mask]


In [47]:
redictstore(path=destfiles, key_prefix='data/bisai2')