# 检查shape以及label

In [None]:
import os, shutil
from onekey_algo import get_param_in_cwd
import numpy as np
import nibabel as nib
from glob import glob

root = get_param_in_cwd('radio_dir')
for modal in ['MR-CE', 'MR-T2', 'endoscope']:
    for sample in os.listdir(os.path.join(root, modal)):        
        try:
            if modal != 'endoscope':
                img, msk = sorted(glob(os.path.join(root, modal, sample, '*.nii.gz')), key=lambda x: os.path.getsize(x), reverse=True)
                shutil.move(msk, os.path.join(root, modal, sample, f"{sample}-label.nii.gz"))
                shutil.move(img, os.path.join(root, modal, sample, f"{sample}.nii.gz"))
            else:
                for f in os.listdir(os.path.join(root, modal, sample)):
                    _, ext = os.path.splitext(f)
                    shutil.move(os.path.join(root, modal, sample, f), os.path.join(root, modal, sample, f"{sample}{ext}"))
        except:
            print(modal, sample)

# 划分数据

In [None]:
import os
import re
import shutil
import pandas as pd
from onekey_algo.custom.components.comp2 import split_dataset4sol
from onekey_algo import get_param_in_cwd
from onekey_algo.custom.utils import print_join_info

data = pd.read_csv(os.path.join(get_param_in_cwd('radio_dir'), 'label.csv'))
train_data = data[data['group'] == 'train']
test_data = data[data['group'] != 'train']

rt = split_dataset4sol(train_data, train_data['label'], cv=False, n_trails=10, test_size=0.33, save_dir='.', shuffle=True, map_ext=True)
for idx, (train, val) in enumerate(rt):
    val['group'] = 'val'
    rnd = pd.concat([train, val, test_data], axis=0)
    display(rnd['group'].value_counts())
    rnd.to_csv(f'split_info/label-RND-{idx}.csv', index=False)
    rnd['ID'] = rnd['ID'].map(lambda x: x.replace('.gz', '.png'))
    rnd[rnd['group'] == 'train'][['ID', 'label']].to_csv(f'split_info/train-RND-{idx}.txt', sep='\t', header=None, index=None)
    rnd[rnd['group'] != 'train'][['ID', 'label']].to_csv(f'split_info/val-RND-{idx}.txt', sep='\t', header=None, index=None)
    
#     pd.merge(rnd[rnd['group'] == 'train'], samples.drop_duplicates('ID'), on='ID', how='inner')[['ID', 'label']].to_csv(f'split_info/train3d-RND-{idx}.txt', 
#                                                                                                   sep='\t', header=None, index=None)
#     pd.merge(rnd[rnd['group'] != 'train'], samples.drop_duplicates('ID'), on='ID', how='inner')[['ID', 'label']].to_csv(f'split_info/val3d-RND-{idx}.txt',
#                                                                                                   sep='\t', header=None, index=None)

# 划分json

In [None]:
root = os.path.join(get_param_in_cwd('radio_dir'), 'json')
def split_ds(subset):
    save2 = os.path.join(root, subset)
    os.makedirs(save2, exist_ok=True)
    for idx, row in data[data['group'] == subset].iterrows():
        fname = row['ID'].replace('.nii.gz', '') + '.json'
        shutil.copy(os.path.join(root, fname), save2)
        
split_ds('train')
split_ds('test')

# 分割images, masks

In [None]:
import os, shutil
from onekey_algo import get_param_in_cwd
import numpy as np
import nibabel as nib
from glob import glob
import re

root = get_param_in_cwd('radio_dir')
for modal in ['MR-CE', 'MR-T2']:
    for sample in os.listdir(os.path.join(root, modal)):        
        try:
            img, *msks = sorted(glob(os.path.join(root, modal, sample, '*.nii.gz')), key=lambda x: os.path.getsize(x), reverse=True)
            img_dir = os.path.join(root, modal, 'images')
            os.makedirs(img_dir, exist_ok=True)
            shutil.copy(img, os.path.join(img_dir, f"{sample}.nii.gz"))
            for msk in msks:
                if 'label' in msk:
                    msk_dir = os.path.join(root, modal, 'masks')
                elif 'infer' in msk:
                    msk_dir = os.path.join(root, modal, msk.split('.')[1])
                os.makedirs(msk_dir, exist_ok=True)
                shutil.copy(msk, os.path.join(msk_dir, f"{sample}.nii.gz"))
        except:
            print(modal, sample)