# Make_cfos_count_df
By: Austin Hoag

Date: November 25, 2020

The purpose of this notebook is to remake the "NxK" dataframe for Jess' c-Fos experiments, where N is the number of animals and K is the number of brain regions in the Princeton Mouse Brain atlas annotation volume. 

In [1]:
import os, glob
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
import tifffile
import seaborn as sns
from concurrent.futures import ProcessPoolExecutor
import pickle
%matplotlib inline

In [8]:
# use the 16 bit eroded atlas file
eroded_atlas_file = '/jukebox/LightSheetTransfer/atlas/annotation_sagittal_atlas_20um_16bit_hierarch_labels_60um_edge_80um_vent_erosion.tif'
eroded_atlas_vol = np.array(tifffile.imread(eroded_atlas_file)).astype('uint16')

In [9]:
# Get all of the ids for the regions in the atlas volume 
atlas_segments = np.unique(eroded_atlas_vol)
atlas_segments = np.array([x for x in atlas_segments if x!=0]) # take out the 0 segment since it is not a brain region

In [10]:
basepath = '/jukebox/wang/Jess/lightsheet_output'
batches = ['201810_adultacutePC_ymaze_cfos',
           '201904_ymaze_cfos',
           '201908_tpham_ymaze_cfos',
           '202002_cfos',
           '202010_cfos']

In [84]:
def make_count_dict(brain_path):
    """ 
    ---PURPOSE---
    Given a path on bucket to a specific brain, 
    calculate the counts in
    each Princeton Mouse Brain Atlas (PMA) region
    and save them in a dictionary. 
    It uses the eroded atlas which rejects cells
    within 60 microns of the the edges of the brain  
    and within 80 microns of the ventricles
    or within ventricles
    ---INPUT---
    brain_path      The path on bucket for a specific brain, e.g.
                    /jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an1
    ---OUTPUT---
    count_dict      The dictionary that contains the brain, batch and counts for each PMA region
    """
    print(brain_path)
    batch = brain_path.split('/')[-4]
    brain = brain_path.split('/')[-2]
    count_dict = {'batch':batch,'brain':brain} # initialize the dictionary
    cells_transformed_file = os.path.join(brain_path,'clearmap_cluster_output/cells_transformed_to_Atlas.npy')
    if not os.path.exists(cells_transformed_file):
        cells_transformed_file = os.path.join(brain_path,'ClearMapClusterOutput/cells_transformed_to_Atlas.npy')
    assert os.path.exists(cells_transformed_file)
    try:
        converted_points = np.load(cells_transformed_file)
    except:
        return count_dict
    xyz = np.asarray([(int(xx[0]), int(xx[1]), int(xx[2])) for xx in converted_points]) #cells are counted in horizontal volumes

    # read the cells into a volume the same size as the atlas
    # init empty vol 
    cell_map = np.zeros(eroded_atlas_vol.shape).astype('uint8')
    # fill volume
    for x,y,z in xyz:
        try:
            cell_map[z,y,x] = 1 # no dilation
        except Exception as e:
            # Some cells will fall outside the volume - just how clearmap works
            pass
    for atlas_segment in atlas_segments:
        atlas_mask = eroded_atlas_vol == atlas_segment
        count = sum(cell_map[atlas_mask])
        count_dict[atlas_segment] = count
    return count_dict

In [73]:
%%time
# For a single batch loop through all of the animals and get the count_dict using parallel processing
# to speed this up.
# This saves a pickle file containing the list of count_dicts for this batch so that they can be loaded later
count_dict_list = [] # list of dicts which contain the batch, brain, a number of counts in each PMA region

batch = '201810_adultacutePC_ymaze_cfos'
batch_dir = os.path.join(basepath,batch,'processed')
animal_dirs = glob.glob(batch_dir+'/*/')
brain_paths = animal_dirs
with ProcessPoolExecutor(max_workers=32) as executor:
    for result in executor.map(make_count_dict,brain_paths):
        try:
            count_dict_list.append(result)
        except Exception as exc:
            print(f'generated an exception: {exc}')
savename = f'./count_dict_list_{batch}.p'
with open(savename,'wb') as pkl:
    pickle.dump(count_dict_list,pkl)
print(f"wrote {savename}")

/jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_crus1_5//jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_lob6_17//jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_lob6_15/

/jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_crus1_10//jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_crus1_3//jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_crus1_2//jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_lob6_18//jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_lob6_21//jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_crus1_1//jukebox/wang/Jess/lightsheet_output/201810_adultacutePC_ymaze_cfos/processed/dadult_pc_lob6_19/
/jukebox/wang/Jess/lightsheet_output

I ran this for each batch, which generated the count pickle file for each 

In [65]:
%%time
count_dict_list = [] # list of dicts which contain the batch, brain, a number of counts in each PMA region

batch = '201908_tpham_ymaze_cfos'
batch_dir = os.path.join(basepath,batch,'processed')
animal_dirs = glob.glob(batch_dir+'/*/')
brain_paths = animal_dirs
print(len(brain_paths))
with ProcessPoolExecutor(max_workers=32) as executor:
    for result in executor.map(make_count_dict,brain_paths):
        try:
            count_dict_list.append(result)
        except Exception as exc:
            print(f'generated an exception: {exc}')
savename = f'./count_dict_list_{batch}.p'
with open(savename,'wb') as pkl:
    pickle.dump(count_dict_list,pkl)
print(f"wrote {savename}")

24
/jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an16//jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an25//jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an07//jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an04/
/jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an03//jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an17//jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an22/

/jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an08//jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an14//jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an23//jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an09//jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an02/

/jukebox/wang/Jess/lightsheet_output/201908_tpham_ymaze_cfos/processed/an18//juk

In [66]:
%%time
count_dict_list = [] # list of dicts which contain the batch, brain, a number of counts in each PMA region

batch = '201904_ymaze_cfos'
batch_dir = os.path.join(basepath,batch,'processed')
animal_dirs = glob.glob(batch_dir+'/*/')
brain_paths = animal_dirs
print(len(brain_paths))
with ProcessPoolExecutor(max_workers=32) as executor:
    for result in executor.map(make_count_dict,brain_paths):
        try:
            count_dict_list.append(result)
        except Exception as exc:
            print(f'generated an exception: {exc}')
savename = f'./count_dict_list_{batch}.p'
with open(savename,'wb') as pkl:
    pickle.dump(count_dict_list,pkl)
print(f"wrote {savename}")

33
/jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an25//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an16//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an6//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an3//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an28//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an8//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an29//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an17//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an32/
/jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an14/

/jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an9/


/jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an26//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an31//jukebox/wang/Jess/lightsheet_output/201904_ymaze_cfos/processed/an23//jukebox/wang/J

In [67]:
ls

count_dict_list_201904_ymaze_cfos.p        make_16bit_PMA_eroded_atlas.ipynb
count_dict_list_201908_tpham_ymaze_cfos.p  make_cfos_count_df.ipynb
count_dict_list_202002_cfos.p              [0m[38;5;27mold_counts[0m/


In [22]:
ids = segment_props_dict['inline']['ids']
segment_names = segment_props_dict['inline']['properties'][0]['values']
segment_names

['None: root',
 'grey: Basic cell groups and regions',
 'BS: Brain stem',
 'MB: Midbrain',
 'MBmot: Midbrain, motor related',
 'III: Oculomotor nucleus',
 'MT: Medial terminal nucleus of the accessory optic tract',
 'LT: Lateral terminal nucleus of the accessory optic tract',
 'DT: Dorsal terminal nucleus of the accessory optic tract',
 'IV: Trochlear nucleus',
 'MRN: Midbrain reticular nucleus',
 'MRNm: Midbrain reticular nucleus, magnocellular part',
 'MRNmg: Midbrain reticular nucleus, magnocellular part, general',
 'MRNp: Midbrain reticular nucleus, parvicellular part',
 'RN: Red nucleus',
 'AT: Anterior tegmental nucleus',
 'RR: Midbrain reticular nucleus, retrorubral area',
 'SCm: Superior colliculus, motor related',
 'SCig: Superior colliculus, motor related, intermediate gray layer',
 'SCig-a: Superior colliculus, motor related, intermediate gray layer, sublayer a',
 'SCig-b: Superior colliculus, motor related, intermediate gray layer, sublayer b',
 'SCig-c: Superior colliculus

In [79]:
segment_name_dict = {int(ids[ii]):segment_names[ii].split(':')[1].strip() for ii in range(len(ids))}

In [80]:
segment_name_dict

{1: 'root',
 2: 'Basic cell groups and regions',
 3: 'Brain stem',
 4: 'Midbrain',
 5: 'Midbrain, motor related',
 6: 'Oculomotor nucleus',
 7: 'Medial terminal nucleus of the accessory optic tract',
 8: 'Lateral terminal nucleus of the accessory optic tract',
 9: 'Dorsal terminal nucleus of the accessory optic tract',
 10: 'Trochlear nucleus',
 11: 'Midbrain reticular nucleus',
 12: 'Midbrain reticular nucleus, magnocellular part',
 13: 'Midbrain reticular nucleus, magnocellular part, general',
 14: 'Midbrain reticular nucleus, parvicellular part',
 15: 'Red nucleus',
 16: 'Anterior tegmental nucleus',
 17: 'Midbrain reticular nucleus, retrorubral area',
 18: 'Superior colliculus, motor related',
 19: 'Superior colliculus, motor related, intermediate gray layer',
 20: 'Superior colliculus, motor related, intermediate gray layer, sublayer a',
 21: 'Superior colliculus, motor related, intermediate gray layer, sublayer b',
 22: 'Superior colliculus, motor related, intermediate gray layer

In [81]:
%%time 
# now load the count dicts for each dataset and make a new dict using segment names instead of ids

count_dict_list_names = []

for batch in batches:
    count_dict_filename = f'./count_dict_list_{batch}.p'
    with open(count_dict_filename,'rb') as pkl:
        count_dict_list = pickle.load(pkl)
    for count_dict in count_dict_list:
        batch = count_dict['batch']
        brain = count_dict['brain']
        print(batch,brain)
        count_dict_names = {'batch':batch,'brain':brain}
        if len(count_dict) == 2:
            print("cells transformed file was corrupted. Skipping")
            continue
        for atlas_segment_id in atlas_segments:
            segment_name = segment_name_dict[atlas_segment_id]
            count_dict_names[segment_name] = count_dict[atlas_segment_id]
                
        count_dict_list_names.append(count_dict_names)

201810_adultacutePC_ymaze_cfos dadult_pc_lob6_15
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_5
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_2
201810_adultacutePC_ymaze_cfos dadult_pc_lob6_19
201810_adultacutePC_ymaze_cfos dadult_pc_lob6_21
201810_adultacutePC_ymaze_cfos dadult_pc_lob6_17
201810_adultacutePC_ymaze_cfos dadult_pc_lob6_18
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_3
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_8
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_1
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_10
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_6
201810_adultacutePC_ymaze_cfos dadult_pc_lob6_16
201810_adultacutePC_ymaze_cfos dadult_pc_lob6_14
201810_adultacutePC_ymaze_cfos dadult_pc_lob6_20
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_7
201810_adultacutePC_ymaze_cfos dadult_pc_lob6_13
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_4
201810_adultacutePC_ymaze_cfos dadult_pc_crus1_9
201904_ymaze_cfos an25
201904_ymaze_cfos an16
201904_ymaze_cfos an6


In [82]:
df = pd.DataFrame(count_dict_list_names)
df

Unnamed: 0,batch,brain,root,Midbrain,Oculomotor nucleus,Medial terminal nucleus of the accessory optic tract,Lateral terminal nucleus of the accessory optic tract,Dorsal terminal nucleus of the accessory optic tract,Trochlear nucleus,Midbrain reticular nucleus,...,supraoptic commissures,fasciculus retroflexus,habenular commissure,stria medullaris,nigrostriatal tract,rubrospinal tract,ventral tegmental decussation,crossed tectospinal pathway,direct tectospinal pathway,doral tegmental decussation
0,201810_adultacutePC_ymaze_cfos,dadult_pc_lob6_15,1,2372,22,16,14,23,0,637,...,12,1,0,1,14,48,4,39,0,6
1,201810_adultacutePC_ymaze_cfos,dadult_pc_crus1_5,0,3387,11,10,5,14,3,733,...,6,12,3,31,18,47,5,41,0,1
2,201810_adultacutePC_ymaze_cfos,dadult_pc_crus1_2,0,1155,1,3,7,16,0,166,...,5,5,0,30,13,7,0,11,0,0
3,201810_adultacutePC_ymaze_cfos,dadult_pc_lob6_19,0,2524,0,13,20,7,0,830,...,13,5,0,4,8,11,13,0,0,1
4,201810_adultacutePC_ymaze_cfos,dadult_pc_lob6_21,0,1111,5,1,14,11,0,152,...,3,3,0,6,4,28,0,18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,202010_cfos,an008,0,4200,9,27,13,14,0,887,...,67,29,0,28,30,332,22,9,0,2
167,202010_cfos,an006,0,8680,10,64,57,12,0,1352,...,121,34,0,39,36,406,27,39,0,2
168,202010_cfos,an005,2,5800,0,20,16,17,0,512,...,29,4,0,3,15,172,0,35,0,0
169,202010_cfos,an016,2,12951,19,84,62,18,1,3421,...,43,66,12,118,108,317,30,61,0,2


In [83]:
# save the dataframe to a CSV file
df.to_csv('../data/animals_eroded_counts_20201125.csv')