In [4]:
import pandas as pd
import numpy as np

In [18]:
fstr_in = "/projects/master-theses/henschell/FastInfantSurfer/data/dataset_split_large_training_t1t2.csv"
fstr_out = f"{fstr_in[:-4]}_meta_hires.tsv" 

In [19]:
# Read in the meta-information, create correct SubjectID for merging with val data
meta = pd.read_csv("/projects/master-theses/henschell/FastInfantSurfer/data/subject_meta_full.csv")
meta["SubjectFix"] = "sub-" + meta["participant_id"].astype(str) + "_ses-" + meta["session_id"].astype(str) 

# Read in the split data
val = pd.read_csv(fstr_in, names=["Path"])

# Split to get SubjectID
val["SubjectFix"] = val["Path"].str.split("/").str[-1]

In [20]:
val.shape

(356, 2)

In [21]:
meta[["SubjectFix", "scan_age", "sex"]].head()

Unnamed: 0,SubjectFix,scan_age,sex
0,sub-CC00050XX01_ses-7201,43.29,female
1,sub-CC00051XX02_ses-7702,40.0,female
2,sub-CC00052XX03_ses-8300,38.71,female
3,sub-CC00053XX04_ses-8607,40.43,female
4,sub-CC00054XX05_ses-8800,42.14,male


In [22]:
# Add Gender and Age to split data
val = val.merge(meta[["SubjectFix", "scan_age", "sex"]], on="SubjectFix", how="inner").drop_duplicates()


In [23]:
val.shape

(355, 4)

In [24]:
# Sort by 1. Gender, 2. Age
val_sorted = val.sort_values(["sex", "scan_age"]).reset_index(drop=True)


In [25]:
# Assign 0.5, 0.8, 1.0 until the end
from itertools import cycle

resolution = cycle([0.5, 0.8]) #, 1.0])
val_sorted["Resolution"] = [next(resolution) for res in range(len(val_sorted))]

In [26]:
val_sorted

Unnamed: 0,Path,SubjectFix,scan_age,sex,Resolution
0,/autofs/vast/lzgroup/Projects/FastInfantSurfer...,sub-CC00718XX17_ses-210400,26.71,female,0.5
1,/autofs/vast/lzgroup/Projects/FastInfantSurfer...,sub-CC00936XX21_ses-11431,27.43,female,0.8
2,/autofs/vast/lzgroup/Projects/FastInfantSurfer...,sub-CC00867XX18_ses-37111,28.29,female,0.5
3,/autofs/vast/lzgroup/Projects/FastInfantSurfer...,sub-CC00796XX22_ses-245100,29.29,female,0.8
4,/autofs/vast/lzgroup/Projects/FastInfantSurfer...,sub-CC00530XX11_ses-153600,29.29,female,0.5
...,...,...,...,...,...
350,/autofs/vast/lzgroup/Projects/FastInfantSurfer...,sub-CC00130XX07_ses-44001,43.43,male,0.5
351,/autofs/vast/lzgroup/Projects/FastInfantSurfer...,sub-CC00890XX17_ses-13330,43.71,male,0.8
352,/autofs/vast/lzgroup/Projects/FastInfantSurfer...,sub-CC01027XX13_ses-82630,43.71,male,0.5
353,/autofs/vast/lzgroup/Projects/FastInfantSurfer...,sub-CC01206XX10_ses-143530,43.86,male,0.8


In [27]:
# write csv-file with res to file
val_sorted.to_csv(fstr_out, index=False, sep="\t")

In [29]:
fstr_out

'/projects/master-theses/henschell/FastInfantSurfer/data/dataset_split_large_training_t1t2_meta_hires.tsv'

In [17]:
# check if you can load the file
separator = "\t" if fstr_out[-3] == "t" else ","
s_file = pd.read_csv(fstr_out, sep=separator)
subject_dirs = s_file["Path"].to_list()
subject_dirs

['/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00694XX19_ses-201800',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00666XX15_ses-198200',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00526XX15_ses-150500',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00305XX08_ses-98101',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC01153AN07_ses-102030',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00628XX18_ses-181800',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC01129XX16_ses-101430',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00946XX23_ses-15230',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC01218XX14_ses-147430',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00986AN22_ses-26730',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00648XX22_ses-191100',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00517XX14_ses-145000',
 '/groups/ag-reuter/projects/datasets/dHCP/Data/sub-CC00688XX21_ses-199500',
 '