# Adding subcortical features to cortical atlases

We have a set of subcortical atlases in NLin6 and 2009cAsym. We should make one composite atlas in each space that can then be added to the schaefer atlases:

In [1]:
import os
import nibabel as nb
tsvs = {
    "cerebellum": "cerebellum/atl-MDTB10.tsv",
    "subcortical": "CIT168/atl-CIT168.tsv",
    "thalamic": "thalamic_atlas/atl-hcpthalamic.tsv"
}

tfhome = os.getenv("TEMPLATEFLOW_HOME")

nlin6_ref = tfhome + "/tpl-MNI152NLin6Asym/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz"
nlin6_ref_img = nb.load(nlin6_ref)
nlin6 = {
    "cerebellum": "cerebellum/tpl-MNI152NLin6Asym_atlas-MDTB10_res-01_dseg.nii.gz",
    "subcortical": "CIT168/tpl-MNI152NLin6Asym_atlas-CIT168_res-01_desc-LRSplit_dseg.nii.gz",
    "thalamic": "thalamic_atlas/tpl-MNI152NLin6Asym_atlas-hcpthalamic_res-01_dseg.nii.gz"
}

nlin09c_ref = tfhome + "/tpl-MNI152NLin2009cAsym/tpl-MNI152NLin2009cAsym_res-01_desc-brain_mask.nii.gz"
nlin09c_ref_img = nb.load(nlin09c_ref)
nlin09c = {
    "cerebellum": "cerebellum/tpl-MNI152NLin2009cAsym_atlas-MDTB10_res-01_dseg.nii.gz",
    "subcortical": "CIT168/tpl-MNI152NLin2009cAsym_atlas-CIT168_res-01_desc-LRSplit_dseg.nii.gz",
    "thalamic": "thalamic_atlas/tpl-MNI152NLin2009cAsym_atlas-hcpthalamic_res-01_dseg.nii.gz"
}

In [2]:
from glob import glob
import nibabel as nb
import numpy as np

def check_grids(atlas_files):
    print("checking files:")
    print("\n  " + "\n  ".join(atlas_files))

    print("\ncomparing grids:")
    ref_img = nb.load(atlas_files.pop())

    for try_file in atlas_files:
        try_img = nb.load(try_file)
        if not np.allclose(try_img.affine, ref_img.affine):
            raise Exception('incompatible affines:', try_img, try_img.affine, ref_img.affine)
        if not try_img.shape == ref_img.shape:
            raise Exception('incompatible shapes:', try_img, try_img.shape, ref_img.shape)
        print(try_img.shape, "==", ref_img.shape)
    print("All grids match orientation and shape!!")
    
check_grids([nlin6_ref] + list(nlin6.values()))
check_grids([nlin09c_ref] + list(nlin09c.values()))

checking files:

  /Users/mcieslak/projects/templateflow/tpl-MNI152NLin6Asym/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz
  cerebellum/tpl-MNI152NLin6Asym_atlas-MDTB10_res-01_dseg.nii.gz
  CIT168/tpl-MNI152NLin6Asym_atlas-CIT168_res-01_desc-LRSplit_dseg.nii.gz
  thalamic_atlas/tpl-MNI152NLin6Asym_atlas-hcpthalamic_res-01_dseg.nii.gz

comparing grids:
(182, 218, 182) == (182, 218, 182)
(182, 218, 182) == (182, 218, 182)
(182, 218, 182) == (182, 218, 182)
All grids match orientation and shape!!
checking files:

  /Users/mcieslak/projects/templateflow/tpl-MNI152NLin2009cAsym/tpl-MNI152NLin2009cAsym_res-01_desc-brain_mask.nii.gz
  cerebellum/tpl-MNI152NLin2009cAsym_atlas-MDTB10_res-01_dseg.nii.gz
  CIT168/tpl-MNI152NLin2009cAsym_atlas-CIT168_res-01_desc-LRSplit_dseg.nii.gz
  thalamic_atlas/tpl-MNI152NLin2009cAsym_atlas-hcpthalamic_res-01_dseg.nii.gz

comparing grids:
(193, 229, 193) == (193, 229, 193)
(193, 229, 193) == (193, 229, 193)
(193, 229, 193) == (193, 229, 193)
All grids matc

Let's look at what one of these entries looks like:

In [3]:
import numpy as np
import pandas as pd


def roi_data(nii_file):
    return nb.load(nii_file).get_fdata().astype(np.uint32)


def get_region_data(tsv_file):
    df = pd.read_csv(tsv_file, sep="\t")
    return df['index'].tolist(), df['name'].tolist()


def tsv_to_config(tsv_file):
    config = {}
    config['node_ids'], config['node_names'] = get_region_data(tsv_file)
    return config


def verify_atlas(atlas_config, atlas_data):
    data_regions = np.unique(atlas_data)
    data_regions = data_regions[data_regions > 0].tolist()
    for node_id, node_name in zip(atlas_config['node_ids'], atlas_config['node_names']):
        if node_id not in data_regions:
            raise Exception("%d: %s not in atlas data" % (node_id, node_name))
    
    missing_regions = set(atlas_config['node_ids']).difference(data_regions)
    if  missing_regions:
        raise Exception("%s present in data but not in labels" % str(missing_regions))

for atlas in ['cerebellum', 'thalamic', 'subcortical']:
    
    # Check nlin6
    verify_atlas(tsv_to_config(tsvs[atlas]), roi_data(nlin6[atlas]))
    
    # Check 2009c
    verify_atlas(tsv_to_config(tsvs[atlas]), roi_data(nlin09c[atlas]))

The TSVs and the niftis all check out.

## Combining the atlases

We need to add the cerebellum, thalamus and subcortical regions into a single atlas. Below are some functions that help us add one atlas into another. We'll add these three together and then add them to all the other atlases that don't have any subcortical regions

In [4]:
from copy import deepcopy
from scipy.stats import mode

def match_partitions(data1, data2):
    """Match the regions in data1 to the regions in data2"""
    
    data1_ids = np.unique(data1)
    data1_ids = data1_ids[data1_ids > 0].tolist()
    data2_ids = np.unique(data2)
    data2_ids = data2_ids[data2_ids > 0].tolist()
    
    mapping = {}
    for data1_id in data1_ids:
        region_mask = data1 == data1_id
        mapping[data1_id], = mode(data2[region_mask]).mode
    
    
    return mapping

def remap_values(original_data, mapping):
    remapped = np.zeros_like(original_data)
    for old_value, new_value in mapping.items():
        remapped[original_data==old_value] = new_value
    return remapped
    

Testing this code, let's use the cit168 atlas as the base and make sure these functions do what we think they do

In [5]:
# This simulates re-numbering the CIT168 altas
cit_labels, cit_names = get_region_data(tsvs['subcortical'])
new_atlas_mapping = {old_value: new_value for old_value, new_value in 
                    zip(cit_labels,
                        np.argsort(cit_labels)+1)}

print("atlas mapping:")
print(*sorted(new_atlas_mapping.items()), sep='  ')

hemi_cit_data = nb.load(nlin6['subcortical']).get_fdata().astype(np.uint32)
remapped_atlas = remap_values(hemi_cit_data, new_atlas_mapping)
print("matched labels:" )
print(*sorted(match_partitions(hemi_cit_data, remapped_atlas).items()), sep='  ')

atlas mapping:
(1, 1)  (2, 2)  (3, 3)  (4, 4)  (5, 5)  (6, 6)  (7, 7)  (8, 8)  (9, 9)  (12, 10)  (13, 11)  (14, 12)  (15, 13)  (16, 14)  (101, 15)  (102, 16)  (103, 17)  (104, 18)  (105, 19)  (106, 20)  (107, 21)  (108, 22)  (109, 23)  (112, 24)  (113, 25)  (114, 26)  (115, 27)  (116, 28)
matched labels:
(1, 1)  (2, 2)  (3, 3)  (4, 4)  (5, 5)  (6, 6)  (7, 7)  (8, 8)  (9, 9)  (12, 10)  (13, 11)  (14, 12)  (15, 13)  (16, 14)  (101, 15)  (102, 16)  (103, 17)  (104, 18)  (105, 19)  (106, 20)  (107, 21)  (108, 22)  (109, 23)  (112, 24)  (113, 25)  (114, 26)  (115, 27)  (116, 28)


It works!! Now we can start adding these atlases to one another.



In [6]:
def add_atlas_to_another(atlas1_config, atlas1_data, 
                         atlas2_config, atlas2_data):
    """
    Add atlas2 into atlas1. Ensure the labels are updated
    so there is no overlap 
    """

    # Verify inputs
    verify_atlas(atlas1_config, atlas1_data)
    verify_atlas(atlas2_config, atlas2_data)
    atlas2_data = atlas2_data.copy()

    # What is the largest number in atlas1? This will be the minimum
    # value in the new ids for atlas2 after it's been added to atlas1
    merged_atlas_min = np.max(atlas1_data) + 1
    atlas2_id_mapping = dict(
        zip(atlas2_config['node_ids'],
            np.argsort(atlas2_config['node_ids']) + merged_atlas_min))
    atlas2_merged_ids = [atlas2_id_mapping[node_id] for node_id in
                         atlas2_config['node_ids']]

    # Make sure that any voxels that are labeled in the base image 
    # are NOT overwritten by the new atlas data
    atlas2_data[atlas1_data > 0] = 0

    # Ensure that we have not clobbered any regions by doing so
    verify_atlas(atlas2_config, atlas2_data)

    # Change their node ids so they don't conflict with the base
    remapped_atlas2 = remap_values(atlas2_data, atlas2_id_mapping)

    merged_image_data = atlas1_data + remapped_atlas2
    merged_image_labels = atlas1_config['node_names'] + \
                          atlas2_config['node_names']
    merged_image_ids = atlas1_config['node_ids'] + atlas2_merged_ids
    merged_atlas_config = {"node_names": merged_image_labels,
                           "node_ids": merged_image_ids}
    verify_atlas(merged_atlas_config, merged_image_data)
    return merged_atlas_config, merged_image_data


def expand_df(df):
    spl = df['name'].str.split("_", n=1, expand=True)
    df['label'] = spl[1]
    df['network_id'] = np.nan
    df['network_label'] = np.nan
    df['atlas_name'] = spl[0]
    df.drop("name", axis=1, inplace=True)
    return df
    
    
    

### Creating the full subcortical atlas

Here we merge the three subcortical atlases

In [7]:
cit_hemi_config = tsv_to_config(tsvs['subcortical'])
cit_hemi_data_nlin6 = roi_data(nlin6['subcortical'])
cit_hemi_data_09c = roi_data(nlin09c['subcortical'])

thalamus_config = tsv_to_config(tsvs['thalamic'])
thalamus_config['node_names'] = ["ThalamusHCP_" + name for name in thalamus_config['node_names']]
thalamus_data_nlin6 = roi_data(nlin6['thalamic'])
thalamus_data_09c = roi_data(nlin09c['thalamic'])

cb_config = tsv_to_config(tsvs['cerebellum'])
cb_config['node_names'] = ["Cerebellum_Cerebellar_"+name for name in cb_config['node_names']]
cb_data_nlin6 = roi_data(nlin6['cerebellum'])
cb_data_09c = roi_data(nlin09c['cerebellum'])

# Do NLin6
thalamus_and_cit_config_nlin6, thalamus_and_cit_data_nlin6 = add_atlas_to_another(
    cit_hemi_config, cit_hemi_data_nlin6,
    thalamus_config, thalamus_data_nlin6,
)

full_subcortical_config_nlin6, full_subcortical_data_nlin6 = add_atlas_to_another(
    thalamus_and_cit_config_nlin6, thalamus_and_cit_data_nlin6,
    cb_config, cb_data_nlin6
)

subcortical_nlin6 = nb.Nifti1Image(full_subcortical_data_nlin6, 
                                   nlin6_ref_img.affine, 
                                   header=nlin6_ref_img.header)
subcortical_nlin6.to_filename("tpl-MNI152NLin6Asym_atlas-SubcorticalMerged_res-01_dseg.nii.gz")
nlin6_df = pd.DataFrame(
    {"index": full_subcortical_config_nlin6['node_ids'],
     "name": full_subcortical_config_nlin6['node_names']})
expand_df(nlin6_df).to_csv(
    "tpl-MNI152NLin6Asym_atlas-SubcorticalMerged_res-01_dseg.tsv", sep="\t", index=False)

In [8]:
# Do 09c
thalamus_and_cit_config_09c, thalamus_and_cit_data_09c = add_atlas_to_another(
    cit_hemi_config, cit_hemi_data_09c,
    thalamus_config, thalamus_data_09c,
)

full_subcortical_config_09c, full_subcortical_data_09c = add_atlas_to_another(
    thalamus_and_cit_config_09c, thalamus_and_cit_data_09c,
    cb_config, cb_data_09c
)

subcortical_09c = nb.Nifti1Image(full_subcortical_data_09c, 
                                 nlin09c_ref_img.affine, 
                                 header=nlin09c_ref_img.header)
subcortical_09c.to_filename("tpl-MNI152NLin2009cAsym_atlas-SubcorticalMerged_res-01_dseg.nii.gz")
nlin09c_df = pd.DataFrame(
    {"index": full_subcortical_config_09c['node_ids'],
     "name": full_subcortical_config_09c['node_names']})
expand_df(nlin09c_df).to_csv(
    "tpl-MNI152NLin2009cAsym_atlas-SubcorticalMerged_res-01_dseg.tsv", sep="\t", index=False)

## Making the json files for the atlases

We also want to have descriptive sidecars for the images and tsvs.

In [9]:
import json

cb_sidecar = {
    "Authors": [
        "King, M.",
        "Hernandez-Castillo, C.R.",
        "Poldrack, R.R.",
        "Ivry, R.",
        "Diedrichsen, J."
    ],
    "License": "Creative Commons license CC BY-ND (Attribution - No derivatives)",
    "BIDSVersion": "1.1.0",
    "Curators": [
        "Diedrichsen J"
    ],
    "Name": "Multi-domain task battery (MDTB) cerebellar parcellation",
    "LongDesc": "King et al. (2019) provided an extensive characterization of the functional organization " \
                "of the cerebellum of 24 healthy, young participants. The contast are for for 47 task conditions, " \
                "accounted for the activity caused by left hand, right hand, and eye movements. All contrast maps " \
                "are relative to the mean activitiy across all tasks. The parcellation into 10 regions is defined " \
                "from the task-evoked activity across all tasks.",
    "ReferencesAndLinks": [
        "https://github.com/DiedrichsenLab/cerebellar_atlases",
        "King, M., Hernandez-Castillo, C.R., Poldrack, R.R., Ivry, R., and Diedrichsen, J. (2019). Functional Boundaries in the Human Cerebellum revealed by a Multi-Domain Task Battery. Nat. Neurosci."
    ]
}

thalamic_sidecar = {
    "Authors": [
        "E. Najdenovska",
        "Y. Aléman-Gómez",
        "G. Battistella",
        "M. Descoteaux",
        "P. Hagmann",
        "S. Jacquemont",
        "P. Maeder",
        "J.P. Thiran",
        "E. Fornari",
        "M. Bach Cuadra"
    ],
    "License": "Creative Commons Attribution Share Alike 4.0 International " 
               "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
    "BIDSVersion": "1.1.0",
    "Name": "HCP Thalamic Parcellation",
    "LongDesc": "Regions are based on a probabilistic atlas of anatomical subparts of the thalamus built "
                "upon a relatively large dataset where the individual thalamic parcellation was done by "
                "employing a recently proposed automatic diffusion-based clustering method. NOTE: ",
    "ReferencesAndLinks": [
        "doi: 10.1038/sdata.2018.270",
        "https://zenodo.org/record/1405484/files/Thalamus_Nuclei-HCP-MaxProb.nii.gz",
        "https://zenodo.org/record/1405484/files/Thalamic_Nuclei-ColorLUT.txt",
        "Najdenovska, E., Alemán-Gómez, Y., Battistella, G. et al. In-vivo probabilistic atlas of human thalamic nuclei based on diffusion-weighted magnetic resonance imaging. Sci Data 5, 180270 (2018). https://doi.org/10.1038/sdata.2018.270"
    ]
}

subcortical_sidecar = {
    "Authors": [
        "W.M. Pauli",
        "A.N. Nili",
        "J.M. Tyszka"
    ],
    "BIDSVersion": "1.1.0",
    "Curators": [
        "W.M. Pauli",
        "A.N. Nili",
        "J.M. Tyszka",
        "M. Okamoto"
    ],
    "License": "MIT: https://github.com/jmtyszka/CIT168-SubCorticalAtlas/blob/master/LICENSE.md",
    "Name": "CIT168",
    "RegionNames": {
        "Pu": "Putamen",
        "Ca": "Caudate Nucleus",
        "NAC": "Nucleus Acumbens",
        "EXA": "Extended Amygdala",
        "GPi": "Globus Pallidus, Internal",
        "GPe": "Globus Pallidus, External",
        "VeP": "Ventral Pallidum",
        "SNc": "Substantia Nigra, Pars Compacta",
        "SNr": "Substantia Nigra, Pars Reticulata",
        "STH": "Subthalamic Nucleus",
        "HN": "Hypothalamus",
        "PBP": "Parabrachial Pigmented Nucleus",
        "VTA": "Ventral Tegmental Area",
        "RN": "Red Nucleus",
        "HN": "Habenular Nuclei",
        "MN": "Mammilary Nucleus"},
    "LongDesc": "Parcellations based on high spatial resolution, three-dimensional templates, using high-accuracy "
                "diffeomorphic registration of T1- and T2- weighted structural images from 168 typical adults "
                "between 22 and 35 years old. NOTE: Atlases originally assigned the same label to structures in "
                "each hemisphere. Here we have split the regions into hemispheres. Additionally, the SNC, PBP and VTA "
                "regions have been merged into a single structure.",
    "ReferencesAndLinks": [
        "https://neurovault.org/collections/3145/",
        "https://osf.io/jkzwp/wiki/home/",
        "https://github.com/jmtyszka/CIT168-SubCorticalAtlas",
        "Pauli, W., Nili, A. & Tyszka, J. A high-resolution probabilistic in vivo atlas of human subcortical brain nuclei. Sci Data 5, 180063 (2018). https://doi.org/10.1038"
    ]
}

merged_sidecar = {
    "Authors": [
        "M. Cieslak",
        "T. Salo",
        "E. Feczko",
        "T.D. Satterthwaite"
    ],
    "License": "Creative Commons license CC BY-ND (Attribution - No derivatives)",
    "BIDSVersion": "1.1.0",
    "Curators": [
        "M. Cieslak", "T. Salo", "E. Feczko", "T.D. Satterthwaite"
    ],
    "Name": "Non-Cortical Atlases in Template Space NCATS",
    "LongDesc": "A set of non-cortical atlases that have been merged together in template space.",
    "ReferencesAndLinks": [
        "https://github.com/PennLINC/AtlasPack"
    ],
    # These match up with the `atlas_name` column
    "SourceAtlases": {
        "CIT168Subcortical": subcortical_sidecar,
        "Cerebellum": cb_sidecar,
        "ThalamusHCP": thalamic_sidecar
    }
}

with open("atl-SubcorticalMerged_dseg.json", "w") as atl_json:
    json.dump(merged_sidecar, atl_json, indent=4)
    

## Add the subcortical atlas to cortical parcellations

Now we can add these to the cortical atlases that are missing subcortical regions.

In [None]:
import os
os.makedirs("verified_atlases", exist_ok=True)
def save_data(data_matrix, fname):
    img = nb.Nifti1Image(data_matrix, 
                         ref_img.affine, 
                         header=ref_img.header)
    img.to_filename("verified_atlases/" + fname)

# The new atlas_config.json
final_config = {}

# schaefer100
schaefer100x7_config = original_atlas_config['schaefer100x7']
schaefer100x7_file = 'data/2009cAsym/tpl-MNI152NLin2009cAsym_res-01_atlas-Schaefer2018_desc-100Parcels7NetworksLPS_dseg.nii.gz'
schaefer100x7_data = nb.load(schaefer100x7_file).get_fdata().astype(np.uint32)
verify_atlas(schaefer100x7_config, schaefer100x7_data)
schaefer100x7_sc_config, schaefer100x7_sc_data = add_atlas_to_another(
    schaefer100x7_config, schaefer100x7_data,
    full_subcortical_config, full_subcortical_data)
schaefer100x7_sc_file = 'tpl-MNI152NLin2009cAsym_res-01_atlas-Schaefer2018Ext_desc-100Parcels7NetworksLPS_dseg.nii.gz'
schaefer100x7_sc_config['file'] = schaefer100x7_sc_file
save_data(schaefer100x7_sc_data, schaefer100x7_sc_file)
final_config['schaefer100'] = schaefer100x7_sc_config

# Schaefer200
schaefer200x7_config = original_atlas_config['schaefer200x7']
schaefer200x7_file = 'data/2009cAsym/tpl-MNI152NLin2009cAsym_res-01_atlas-Schaefer2018_desc-200Parcels7NetworksLPS_dseg.nii.gz'
schaefer200x7_data = nb.load(schaefer200x7_file).get_fdata().astype(np.uint32)
verify_atlas(schaefer200x7_config, schaefer200x7_data)
schaefer200x7_sc_config, schaefer200x7_sc_data = add_atlas_to_another(
    schaefer200x7_config, schaefer200x7_data,
    full_subcortical_config, full_subcortical_data)
schaefer200x7_sc_file = 'tpl-MNI152NLin2009cAsym_res-01_atlas-Schaefer2018Ext_desc-200Parcels7NetworksLPS_dseg.nii.gz'
schaefer200x7_sc_config['file'] = schaefer200x7_sc_file
save_data(schaefer200x7_sc_data, schaefer200x7_sc_file)
final_config['schaefer200'] = schaefer200x7_sc_config

# Schaefer400
schaefer400x7_config = original_atlas_config['schaefer400x7']
schaefer400x7_file = 'data/2009cAsym/tpl-MNI152NLin2009cAsym_res-01_atlas-Schaefer2018_desc-400Parcels7NetworksLPS_dseg.nii.gz'
schaefer400x7_data = nb.load(schaefer400x7_file).get_fdata().astype(np.uint32)
verify_atlas(schaefer400x7_config, schaefer400x7_data)
schaefer400x7_sc_config, schaefer400x7_sc_data = add_atlas_to_another(
    schaefer400x7_config, schaefer400x7_data,
    full_subcortical_config, full_subcortical_data)
schaefer400x7_sc_file = 'tpl-MNI152NLin2009cAsym_res-01_atlas-Schaefer2018Ext_desc-400Parcels7NetworksLPS_dseg.nii.gz'
schaefer400x7_sc_config['file'] = schaefer400x7_sc_file
save_data(schaefer400x7_sc_data, schaefer400x7_sc_file)
final_config['schaefer400'] = schaefer400x7_sc_config

In [None]:

for config in final_config:
    data = nb.load(final_config[config]['file']).get_fdata().astype(np.uint32)
    verify_atlas(final_config[config], data)
    print("Verified", config)

In [None]:
%qtconsole