In [1]:
import datetime
import pandas as pd
import numpy as np
import os

import scipy
import nrrd
import h5py

from collections import OrderedDict 

out_path_orig = "./out_dataset/original"
out_path_untag = "./out_dataset/untagged"
out_path_cut = "./out_dataset/cut"
out_path_seg = "./out_dataset/segmented"

In [2]:
labels = pd.read_csv("data.csv")
labels.sort_values(['id', 'scan_date'], inplace=True)
labels

Unnamed: 0,id,scan_date,path,birth,tbv
135,1,01/02/2018,./raw/01/01_X_20180201.nrrd,12/01/2018,181.52
138,1,01/03/2018,./raw/01/01_X_20180301.nrrd,12/01/2018,255.22
139,1,08/02/2018,./raw/01/01_X_20180208.nrrd,12/01/2018,192.31
131,1,08/03/2018,./raw/01/01_X_20180308.nrrd,12/01/2018,261.03
134,1,16/02/2018,./raw/01/01_X_20180216.nrrd,12/01/2018,218.43
...,...,...,...,...,...
118,87,15/07/2019,./raw/87/87_X_20190715.nrrd,16/06/2019,132.57
120,87,21/06/2019,./raw/87/87_X_20190621.nrrd,16/06/2019,99.47
117,87,22/07/2019,./raw/87/87_X_20190722.nrrd,16/06/2019,149.17
115,87,26/06/2019,./raw/87/87_X_20190626.nrrd,16/06/2019,99.51


In [3]:
print(f"Total: {len(labels)}")

for _, row in labels.iterrows():
    day, month, year = row['scan_date'].split('/')
    nrrd_path = os.path.join(f"dataset/{row['id']:02}", f"{row['id']:02}_X_{year}{month}{day}.nrrd")
    mat_path = os.path.join(f"dataset/{row['id']:02}", f"{row['id']:02}_X_{year}{month}{day}VC.mat")
    
    new_path_raster = os.path.join(out_path_orig, f"{row['id']}_{row['scan_date'].replace('/', '-')}.nrrd")
    new_path_raster_cut = os.path.join(out_path_cut, f"{row['id']}_{row['scan_date'].replace('/', '-')}_cut.nrrd")
    new_path_seg = os.path.join(out_path_seg, f"{row['id']}_{row['scan_date'].replace('/', '-')}_cut_seg.nrrd")

    original_data, original_header = nrrd.read(nrrd_path)

    tbv = row['tbv']
    age = (datetime.datetime.strptime(row['scan_date'], '%d/%m/%Y') - datetime.datetime.strptime(row['birth'], '%d/%m/%Y')).days

    original_header = OrderedDict({
        'type': 'unsigned char',
        'dimension': 3,
        'sizes': original_header['sizes'],
        'spacings': original_header['spacings'],
        'kinds': ['space', 'space', 'space'],
        'labels': ['x', 'y', 'z'],
        'encoding': 'raw',
        'units': ['cm', 'cm', 'cm'],
        'tbv': row['tbv'],
        'age_days': age
    })

    nrrd.write(new_path_raster, original_data, original_header)

    try:
        mat = scipy.io.loadmat(mat_path)

        spacings = np.array(mat["Info"][0][0][2][0])
        raster = mat["Info"][0][0][3]
        segmentation = mat["Info"][0][0][4]
    except NotImplementedError:
        with h5py.File(mat_path, 'r') as f:
            spacings_data = f.get("Info/spacings")
            raster_data = f.get("Info/data")
            seg_data = f.get("Info/Vseg")

            sizes = raster_data.shape
            
            spacings = np.zeros((3, 1), dtype=np.float32)
            raster = np.zeros(sizes, dtype=np.uint8)
            segmentation = np.zeros(sizes, dtype=np.uint8)

            spacings_data.read_direct(spacings)
            raster_data.read_direct(raster)
            seg_data.read_direct(segmentation)

            spacings = spacings[:, 0]
    except:
        print("!")
        continue

    np.multiply(segmentation, 255, out=segmentation, casting="unsafe")
    raster = np.transpose(raster, (1, 0, 2))
    segmentation = np.transpose(segmentation, (1, 0, 2))

    header = OrderedDict({
        'type': 'unsigned char',
        'dimension': 3,
        'sizes': sizes,
        'spacings': spacings,
        'kinds': ['space', 'space', 'space'],
        'labels': ['x', 'y', 'z'],
        'encoding': 'raw',
        'units': ['cm', 'cm', 'cm'],
        'tbv': row['tbv'],
        'age_days': age
    })


    nrrd.write(new_path_raster_cut, raster, header)
    nrrd.write(new_path_seg, segmentation, header)


Total: 371


FileNotFoundError: [Errno 2] No such file or directory: 'dataset/01/01_X_20180201.nrrd'

In [None]:
import shutil

existing_pids = labels['id'].values
for dp, dn, filenames in os.walk("untagged/VOL3D"):
    untagged_files = [(dp, f) for f in filenames if os.path.splitext(f)[1] == '.nrrd']
    
    if untagged_files:
        pid = dp.split("/")[-1].split("_")[0]
        
        
        if int(pid) in labels['id'].values:
            print("!")
            continue

        for f in untagged_files:
            name = os.path.splitext(f[1])[0]
            if name[:2] == "ID":    day, month, year  = name.split("_")[1:4]
            else:                   year, month, day = name.split("_")[:3]

            date = f"{year}-{month}-{day}"[:10]
            new_path_raster = os.path.join(out_path_untag, f"{pid}_{date}.nrrd")
            shutil.copy(os.path.join(f[0], f[1]), new_path_raster)


!


In [7]:
for file in os.listdir("original"):
    if file.endswith(".nrrd"):
        data, header = nrrd.read(os.path.join("original", file))
        if(len(set(header['spacings'])) != 1):
            print(header['spacings'])

[0.45279747 0.45279747 0.45279747]
[0.44515421 0.44515421 0.44515421]
[0.48644782 0.48644782 0.48644782]
[0.49158432 0.49158432 0.49158432]
[0.35924515 0.35924515 0.35924515]
[0.49158432 0.49158432 0.49158432]
[0.48313253 0.48313253 0.48313253]
[0.48131131 0.48131131 0.48131131]
[0.49158432 0.49158432 0.49158432]
[0.43514218 0.43514218 0.43514218]
[0.43514218 0.43514218 0.43514218]
[0.44535578 0.44535578 0.44535578]
[0.46082261 0.46082261 0.46082261]
[0.46247746 0.46247746 0.46247746]
[0.46082261 0.46082261 0.46082261]
[0.50500916 0.50500916 0.50500916]
[0.57210778 0.57210778 0.57210778]
[0.3661194 0.3661194 0.3661194]
[0.49158432 0.49158432 0.49158432]
[0.46082261 0.46082261 0.46082261]
[0.54128876 0.54128876 0.54128876]
[0.46082261 0.46082261 0.46082261]
[0.40464668 0.40464668 0.40464668]
[0.5074167 0.5074167 0.5074167]
[0.49158432 0.49158432 0.49158432]
[0.49158432 0.49158432 0.49158432]
[0.49158432 0.49158432 0.49158432]
[0.46082261 0.46082261 0.46082261]
[0.47788698 0.47788698 0.4