# make_eroded_voxel_count_df
In this notebook, I create a CSV file like this one: /jukebox/LightSheetTransfer/atlas/Princeton_mouse_atlas_id_table_public.csv 
But where the voxel_in_structure is calculated on the eroded atlas.

In [9]:
import os, glob
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
from brain_atlas_toolkit import graph_tools
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor

import tifffile

In [2]:
# Load in the eroded 16-bit PMA annotation atlas
atlas_file = '/jukebox/LightSheetTransfer/atlas/annotation_sagittal_atlas_20um_16bit_hierarch_labels_60um_edge_80um_vent_erosion.tif'
atlas_vol = np.array(tifffile.imread(atlas_file)).astype('uint16')

In [3]:
# Get all of the ids for the regions in the atlas volume 
atlas_segments = np.unique(atlas_vol)
atlas_segments = np.array([x for x in atlas_segments if x!=0]) # take out the 0 segment since it is not a brain region

In [4]:
# Segment properties file for this atlas hierachy -> This contains the mapping between id and region name
segment_props_file = '/jukebox/wang/ahoag/lightsheet_helper_scripts/projects/combine_cfos_batches/data/pma_segment_properties_info'

In [5]:
# Load in ontology json file to python dictionary
ontology_json_file = "/jukebox/LightSheetTransfer/atlas/PMA_ontology.json"
with open(ontology_json_file,'r') as infile:
    ontology_dict = json.load(infile)

In [39]:
# Make a dictionary mapping node name to id in the atlas
with open(segment_props_file,'r') as infile:
    segment_props_dict = json.load(infile)
ids = segment_props_dict['inline']['ids']
segment_names = segment_props_dict['inline']['properties'][0]['values']

# Create a dictionary mapping the ids to the names, and strip off the acroynms for the time being
segment_id_dict = {segment_names[ii].split(':')[1].strip():int(ids[ii]) for ii in range(len(segment_names))}
segment_name_dict = {int(ids[ii]):segment_names[ii].split(':')[1].strip() for ii in range(len(ids))}

In [56]:
def count_voxels(IDlist):
    """ Given an IDlist in the eroded annotation atlas
    count the number of voxels occupied by each 
    ID
    """
    print("Received ID list: ",IDlist)
    this_count_dict = {}
    for ID in IDlist:
        if ID not in atlas_vol:
            this_count_dict[ID] = 0
        atlas_mask = atlas_vol==ID
        count = np.count_nonzero(atlas_mask)
        this_count_dict[ID] = count
    return this_count_dict

In [57]:
master_count_dict_par = {}
chunk_size = 10 # Each core runs get_count() on this many different regions
chunked_segment_lists = [list(map(int,ids[i:i+chunk_size])) for i in range(0,len(ids),chunk_size)]
with ProcessPoolExecutor(max_workers=8) as executor:
    for count_dict_i in executor.map(count_voxels,chunked_segment_lists):
        try:
            for key in count_dict_i:
                master_count_dict_par[key] = count_dict_i[key]
        except Exception as exc:
            print(f'generated an exception: {exc}')

Received ID list: Received ID list: Received ID list: Received ID list: Received ID list: Received ID list: Received ID list: Received ID list:         [51, 52, 53, 54, 55, 56, 57, 58, 59, 60][71, 72, 73, 74, 75, 76, 77, 78, 79, 80][41, 42, 43, 44, 45, 46, 47, 48, 49, 50][21, 22, 23, 24, 25, 26, 27, 28, 29, 30][11, 12, 13, 14, 15, 16, 17, 18, 19, 20][31, 32, 33, 34, 35, 36, 37, 38, 39, 40][1, 2, 3, 4, 5, 6, 7, 8, 9, 10][61, 62, 63, 64, 65, 66, 67, 68, 69, 70]







Received ID list:  [81, 82, 83, 84, 85, 86, 87, 88, 89, 90]
Received ID list:  [91, 92, 93, 94, 95, 96, 97, 98, 99, 100]
Received ID list:  [101, 102, 103, 104, 105, 106, 107, 108, 109, 110]
Received ID list:  [111, 112, 113, 114, 115, 116, 117, 118, 119, 120]
Received ID list:  [121, 122, 123, 124, 125, 126, 127, 128, 129, 130]
Received ID list:  [131, 132, 133, 134, 135, 136, 137, 138, 139, 140]
Received ID list:  [141, 142, 143, 144, 145, 146, 147, 148, 149, 150]
Received ID list:  [151, 152, 153, 154, 155, 156, 157, 158

Received ID list:  [1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170]
Received ID list:  [1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180]
Received ID list:  [1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190]
Received ID list:  [1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200]
Received ID list:  [1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210]
Received ID list:  [1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220]
Received ID list:  [1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230]
Received ID list:  [1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240]
Received ID list:  [1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250]
Received ID list:  [1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260]
Received ID list:  [1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270]
Received ID list:  [1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280]
Received ID list:  [1281, 1282, 1283, 12

In [58]:
master_count_dict_par

{1: 37,
 2: 0,
 3: 0,
 4: 648434,
 5: 0,
 6: 3413,
 7: 3583,
 8: 1612,
 9: 1256,
 10: 864,
 11: 488283,
 12: 0,
 13: 0,
 14: 0,
 15: 70348,
 16: 4076,
 17: 12002,
 18: 0,
 19: 182573,
 20: 5,
 21: 106,
 22: 0,
 23: 191489,
 24: 106062,
 25: 29573,
 26: 125825,
 27: 0,
 28: 56509,
 29: 34125,
 30: 3804,
 31: 390083,
 32: 2972,
 33: 7198,
 34: 7161,
 35: 4395,
 36: 2342,
 37: 0,
 38: 113414,
 39: 0,
 40: 14788,
 41: 18391,
 42: 1844,
 43: 11355,
 44: 0,
 45: 0,
 46: 0,
 47: 9529,
 48: 0,
 49: 0,
 50: 5640,
 51: 131210,
 52: 138962,
 53: 195323,
 54: 8408,
 55: 0,
 56: 29116,
 57: 114943,
 58: 48771,
 59: 396,
 60: 9057,
 61: 4668,
 62: 158,
 63: 0,
 64: 0,
 65: 7711,
 66: 33904,
 67: 0,
 68: 0,
 69: 0,
 70: 0,
 71: 0,
 72: 0,
 73: 0,
 74: 0,
 75: 5116,
 76: 8488,
 77: 16634,
 78: 16229,
 79: 88506,
 80: 0,
 81: 415148,
 82: 0,
 83: 0,
 84: 39518,
 85: 647,
 86: 0,
 87: 2138,
 88: 2024,
 89: 278641,
 90: 0,
 91: 0,
 92: 4438,
 93: 3330,
 94: 6397,
 95: 0,
 96: 45890,
 97: 5059,
 98: 43395

## Now sum up voxels from children and put into parent counts

In [62]:
# Create the dictionary that maps region name to the voxel count in that region (before summing child counts)
count_dict_names = {}
for ID in ids:
    segment_name = segment_name_dict[int(ID)]
    count_dict_names[segment_name] = master_count_dict_par[int(ID)]

In [64]:
ontology_graph = graph_tools.Graph(ontology_dict)

In [65]:
# Use names to get voxels counts in the progeny regions and add them to the shell to get total Thalamus voxel count
bs_progeny = ontology_graph.get_progeny('Brain stem')
bs_count = count_dict_names['Brain stem'] # Initialized to the shell count
# Now loop over progeny ids and add to the count
print(f"Brain stem shell has {bs_count} voxels")
for prog in bs_progeny:
    # Note that not all subregions are in the atlas volume, so skip the ones that aren't
    if prog not in count_dict_names.keys():
        continue
    bs_count +=count_dict_names[prog]
print(f"Final count in entire Brain stem is {bs_count} voxels")

Brain stem shell has 0 voxels
Final count in entire Brain stem is 10608656 voxels


In [66]:
# Do this for all regions
corrected_count_dict = {}
for region in count_dict_names.keys():
    # initialize with the shell count
    counts_region = count_dict_names[region]
    progeny = ontology_graph.get_progeny(region)
    if progeny != []:
        for prog in progeny:
            # Note that not all subregions are in the atlas volume, so skip the ones that aren't
            try:
                counts_region += count_dict_names[prog]
            except KeyError:
                continue
    corrected_count_dict[region] = counts_region

In [67]:
corrected_count_dict

{'root': 42318490,
 'Basic cell groups and regions': 39132694,
 'Brain stem': 10608656,
 'Midbrain': 3410654,
 'Midbrain, motor related': 1898980,
 'Oculomotor nucleus': 3413,
 'Medial terminal nucleus of the accessory optic tract': 3583,
 'Lateral terminal nucleus of the accessory optic tract': 1612,
 'Dorsal terminal nucleus of the accessory optic tract': 1256,
 'Trochlear nucleus': 864,
 'Midbrain reticular nucleus': 488283,
 'Midbrain reticular nucleus, magnocellular part': 0,
 'Midbrain reticular nucleus, magnocellular part, general': 0,
 'Midbrain reticular nucleus, parvicellular part': 0,
 'Red nucleus': 70348,
 'Anterior tegmental nucleus': 4076,
 'Midbrain reticular nucleus, retrorubral area': 12002,
 'Superior colliculus, motor related': 509808,
 'Superior colliculus, motor related, intermediate gray layer': 182684,
 'Superior colliculus, motor related, intermediate gray layer, sublayer a': 5,
 'Superior colliculus, motor related, intermediate gray layer, sublayer b': 106,
 '

In [104]:
# Now construct the dataframe, which will be a list of dictionaries
# Columns will be name, id, parent_structure_id, parent_name,voxels_in_eroded_structure,mm_cubed_in_eroded_structure
dlist = []
for node_name in graph.graph: 
    node = graph.graph[node_name]
    parent = node.parent
    try:
        ID = str(segment_id_dict[node_name])
    except:
        ID = 'None'
    try:
        parent_name = parent.name
        parent_ID = str(segment_id_dict[parent_name])
    except:
        parent_ID = 'None'
    try:
        voxels_in_eroded_structure = corrected_count_dict[node_name]
        mm_cubed_in_eroded_structure = 0.02*0.02*0.02*voxels_in_eroded_structure
    except:
        voxels_in_eroded_structure = 0
        mm_cubed_in_eroded_structure = 0 
    node_dict = {
        'name':node.name,
        'id':ID,
        'parent_structure_id':parent_ID,
        'parent_name':parent,
        'voxels_in_eroded_structure':str(voxels_in_eroded_structure),
        'mm_cubed_in_eroded_structure':mm_cubed_in_eroded_structure,
                }
    dlist.append(node_dict)
    

In [105]:
dlist

[{'name': 'root',
  'id': '1',
  'parent_structure_id': 'None',
  'parent_name': None,
  'voxels_in_eroded_structure': '42318490',
  'mm_cubed_in_eroded_structure': 338.54792000000003},
 {'name': 'Basic cell groups and regions',
  'id': '2',
  'parent_structure_id': '1',
  'parent_name': root,
  'voxels_in_eroded_structure': '39132694',
  'mm_cubed_in_eroded_structure': 313.06155200000006},
 {'name': 'Brain stem',
  'id': '3',
  'parent_structure_id': '2',
  'parent_name': Basic cell groups and regions,
  'voxels_in_eroded_structure': '10608656',
  'mm_cubed_in_eroded_structure': 84.86924800000001},
 {'name': 'Midbrain',
  'id': '4',
  'parent_structure_id': '3',
  'parent_name': Brain stem,
  'voxels_in_eroded_structure': '3410654',
  'mm_cubed_in_eroded_structure': 27.285232000000004},
 {'name': 'Midbrain, motor related',
  'id': '5',
  'parent_structure_id': '4',
  'parent_name': Midbrain,
  'voxels_in_eroded_structure': '1898980',
  'mm_cubed_in_eroded_structure': 15.19184000000000

In [106]:
df = pd.DataFrame(dlist)

In [107]:
df.head(20)

Unnamed: 0,name,id,parent_structure_id,parent_name,voxels_in_eroded_structure,mm_cubed_in_eroded_structure
0,root,1,,,42318490,338.54792
1,Basic cell groups and regions,2,1.0,root,39132694,313.061552
2,Brain stem,3,2.0,Basic cell groups and regions,10608656,84.869248
3,Midbrain,4,3.0,Brain stem,3410654,27.285232
4,"Midbrain, motor related",5,4.0,Midbrain,1898980,15.19184
5,Oculomotor nucleus,6,5.0,"Midbrain, motor related",3413,0.027304
6,Medial terminal nucleus of the accessory optic...,7,5.0,"Midbrain, motor related",3583,0.028664
7,Lateral terminal nucleus of the accessory opti...,8,5.0,"Midbrain, motor related",1612,0.012896
8,Dorsal terminal nucleus of the accessory optic...,9,5.0,"Midbrain, motor related",1256,0.010048
9,Trochlear nucleus,10,5.0,"Midbrain, motor related",864,0.006912


In [110]:
df.to_csv('Princeton_mouse_atlas_voxel_counts_summed_children_20um_16bit_hierarch_labels_60um_edge_80um_vent_erosion.csv',index=False)