将metadata/raw_file_path.csv中存储的center - pid - modility数据，从存储file path 移动到data/0_nifti文件夹下。 并按照BIDS的命名规则完成，即 sub-{pid}_{modility}.nii.gz格式。 

其中 pid 部分，AKH，映射为AKH{pid}; Neimeng，映射为NM{pid}

In [2]:
# Define the function to convert PID to subject ID
def convert_pid_to_subject(row):
    center = row['center']
    pid = row['PID']
    if center == 'AKH':
        return f"AKH{pid.replace('-', '')}"
    elif center == 'Neimeng':
        return f"NM{pid.replace('-', '')}"
    else:
        return pid

In [3]:
import pandas as pd
import os

# Load the metadata
csv_path = '../../metadata/raw_file_path.csv'
df = pd.read_csv(csv_path)

# Display the first few rows
df.head()


Unnamed: 0,center,PID,pet_path,ct_path,lung_seg_path,lesion_seg_path
0,AKH,ABDALLA-ADEL-AHMED20091023,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...
1,AKH,ABT-BRIGITTE20160818,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...
2,AKH,ADAMEK-KARIN20211124,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...
3,AKH,AHMED-MOHAMED20230731,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...
4,AKH,AHMEDI-NAZIF20230111,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...


In [5]:
import shutil
from tqdm import tqdm

# Define the destination directory
dest_dir = '../../data/0_nifti'
os.makedirs(dest_dir, exist_ok=True)

# Apply the function to create the subject_id column
df['subject_id'] = df.apply(convert_pid_to_subject, axis=1)

# Define the modalities
modalities = {
    'pet_path': 'pet',
    'ct_path': 'ct',
    'lung_seg_path': 'seg-lung',
    'lesion_seg_path': 'seg-lesion'
}

# Iterate over the DataFrame and copy files
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Copying files"):
    subject_id = row['subject_id']
    
    # Create a subdirectory for the subject
    subject_dest_dir = os.path.join(dest_dir, f"sub-{subject_id}")
    os.makedirs(subject_dest_dir, exist_ok=True)
    
    for path_col, modality_name in modalities.items():
        src_path = row[path_col]
        
        # Check if the source path is valid
        if pd.notna(src_path) and os.path.exists(src_path):
            # Construct the destination filename
            dest_filename = f"sub-{subject_id}_{modality_name}.nii.gz"
            dest_path = os.path.join(subject_dest_dir, dest_filename)
            
            # Copy the file
            shutil.copy(src_path, dest_path)
        else:
            print(f"Warning: Source file for {subject_id} ({modality_name}) not found or path is NaN: {src_path}")

print("File copying complete.")
df.head()


Copying files: 100%|██████████| 1030/1030 [06:20<00:00,  2.71it/s]

File copying complete.





Unnamed: 0,center,PID,pet_path,ct_path,lung_seg_path,lesion_seg_path,subject_id
0,AKH,ABDALLA-ADEL-AHMED20091023,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...,AKHABDALLAADELAHMED20091023
1,AKH,ABT-BRIGITTE20160818,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...,AKHABTBRIGITTE20160818
2,AKH,ADAMEK-KARIN20211124,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...,AKHADAMEKKARIN20211124
3,AKH,AHMED-MOHAMED20230731,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...,AKHAHMEDMOHAMED20230731
4,AKH,AHMEDI-NAZIF20230111,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/nifti...,/home/yaobo/Project/LungCancer_Subtyping_V2.0/...,/mnt/HDD_1/FDG/LungCancer_Subtyping/data/lesio...,AKHAHMEDINAZIF20230111
