In [1]:
# ------------------------------------------------------------------------------ #
# @Author:        L. Rudelt
# @Email:         lucas.rudelt@ds.mpg.de
# @Created:       2023-11-16 16:49:35
# @Last Modified: 
# ------------------------------------------------------------------------------ #



import logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)-8s | %(name)-s | %(funcName)-s | %(message)s",
    level=logging.WARNING,
)
log = logging.getLogger("notebook")
log.setLevel("DEBUG")

import re
import glob
import h5py
import sys
import os
import numpy as np
import xarray as xr
import pandas as pd


# also needs to be added for each dask-worker
extra_path = os.path.abspath('../')
sys.path.append(extra_path)
log.info(f"project directory: {extra_path}")

from ana import utility as utl
from ana import plot_helper as ph

ph.log.setLevel("DEBUG")
utl.log.setLevel("DEBUG")

data_dir = data_directory = os.path.abspath("../dat/")

2025-01-27 20:39:24,138 | INFO     | notebook | <module> | project directory: /home/carlos/Git/CarlosErnestoMA/mouse_visual_timescales/experiment_analysis


In [2]:
# Load ccf coordinates
from pathlib import Path
from allensdk.core.reference_space_cache import ReferenceSpaceCache
from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache

import re
# Units without layers will be labeled with "NoLayer"
# Not sure if using "NoLayer" is the best option, in any case, check if there is a better way to label these cases in a way that makes it easy to filter out in later processing
# Layer names are all strings and should only include: "1", "2/3", "4", "5", "6"
# Notes: layers 6a and 6b are labeled as 6. Could be worth revising.
# Useful for checking layer names: https://atlas.brain-map.org/atlas?atlas=602630314
def get_layer_name(structure_name):
    try:
        match = re.search(r'layer\s+(.+)$', structure_name)
        if match:
            layer = re.sub(
                r"6[ab]", 
                "6", 
                match.group(1)
            )
            return layer
        return "NoLayer"
    except IndexError:
        return "IndexError"


def get_layer_name_old(acronym):
    try:
        layer = int(re.findall(r'\d+', acronym)[0])
        if layer == 3:
            layer = 0
        return layer
    except IndexError:
        return 0
        

atlas_dir = os.path.join(data_dir, 'neuroscience-raw/Allen/allen_brain_atlas')
#data_directory = '/project.nst/neuroscience-raw/Allen/visual_coding_neuropixels'

manifest_path = os.path.join(data_directory, "manifest.json")

cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)

reference_space_key = os.path.join('annotation', 'ccf_2017')
resolution = 10

rspc = ReferenceSpaceCache(resolution, reference_space_key, manifest=Path(atlas_dir) / 'manifest.json')
# ID 1 is the adult mouse structure graph
tree = rspc.get_structure_tree(structure_graph_id=1) 

# get unit information 
# TODO: adapt default filters to include all the units that we included in the analysis
units = cache.get_units()

# get annotation volume 
annotation, meta = rspc.get_annotation_volume()




In [3]:
# Compute layer information for each unit
# save structure id so it can be used if needed for verification/exploration.
layers = []
layers_old = []
units_with_layer = []
structure_name_list = []
structure_acronyms = []
old_structure_name_list = []
unit_location_exceptions = []
for unit in units.index:
    unit_data = units.loc[unit]
    # get structure id from ccf coordinates and annotation volume 
    x = np.amax((unit_data.anterior_posterior_ccf_coordinate / 10).astype('int'), 0)
    y = np.amax((unit_data.dorsal_ventral_ccf_coordinate / 10).astype('int'), 0)
    z = np.amax((unit_data.left_right_ccf_coordinate / 10).astype('int'), 0)
    try:
        structure_id = annotation[x, y, z]
        structure_name = tree.get_name_map()[structure_id]
        # Get old layer names for comparison:
        structure_by_id = tree.get_structures_by_id([structure_id])
        structure_acronym = structure_by_id[0]["acronym"]
        structure_acronyms += [structure_acronym]
        layers_old += [get_layer_name_old(structure_acronym)]
        #Back to original code:
        layers.append(get_layer_name(structure_name))
        structure_name_list.append(structure_name)
        units_with_layer += [unit]
    except:
        unit_location_exceptions.append([x,y,z])
print("Number of locations which raised exceptions: ", len(unit_location_exceptions))

structure_name_list = np.unique(structure_name_list)

Number of locations which raised exceptions:  6170


In [4]:
# Check how many units are found for each layer
layer_series = pd.Series(layers)
layer_counts = layer_series.value_counts()
print(layer_counts)


NoLayer    18587
5           6939
4           3404
2/3         2609
6           2284
1             17
dtype: int64


In [5]:
# Check how many units are found for each layer (for old version)
layer_series_old = pd.Series(layers_old)
layer_counts_old = layer_series_old.value_counts()
print(layer_counts_old)

0    14814
5     6939
1     3742
4     3404
2     2657
6     2284
dtype: int64


In [6]:
# Create data_frame with units_in_both as index and layers as column, add values from array layers
df = pd.DataFrame(index=units_with_layer, columns=["layer"])
df = df.assign(layer=layers)
# set name of index:
df.index.name = "unit_id"
# save to disk as csv file
df.to_csv("../dat/layers.csv")

In [8]:
#(old data)
# Create data_frame with units_in_both as index and layers as column, add values from array layers
df = pd.DataFrame(index=units_with_layer, columns=["layer"])
df = df.assign(layer=layers_old)
# set name of index:
df.index.name = "unit_id"
# save to disk as csv file
df.to_csv("../dat/layers_old.csv")

In [7]:
for name in structure_name_list:
    print(name, " = ", get_layer_name(name))

Anterior area, layer 1  =  1
Anterior area, layer 2/3  =  2/3
Anterior area, layer 4  =  4
Anterior area, layer 5  =  5
Anterior area, layer 6a  =  6
Anterior area, layer 6b  =  6
Anterior pretectal nucleus  =  NoLayer
Anterolateral visual area, layer 1  =  1
Anterolateral visual area, layer 2/3  =  2/3
Anterolateral visual area, layer 4  =  4
Anterolateral visual area, layer 5  =  5
Anterolateral visual area, layer 6a  =  6
Anterolateral visual area, layer 6b  =  6
Anteromedial visual area, layer 1  =  1
Anteromedial visual area, layer 2/3  =  2/3
Anteromedial visual area, layer 4  =  4
Anteromedial visual area, layer 5  =  5
Anteromedial visual area, layer 6a  =  6
Anteromedial visual area, layer 6b  =  6
Dentate gyrus, granule cell layer  =  NoLayer
Dentate gyrus, molecular layer  =  NoLayer
Dentate gyrus, polymorph layer  =  NoLayer
Dorsal auditory area, layer 2/3  =  2/3
Dorsal auditory area, layer 4  =  4
Dorsal auditory area, layer 5  =  5
Dorsal auditory area, layer 6a  =  6
Do

In [7]:
for name in np.unique(structure_acronyms):
    print(name, " = ", get_layer_name_old(name))

APN  =  0
AUDd2/3  =  2
AUDd4  =  4
AUDd5  =  5
AUDd6a  =  6
AUDd6b  =  6
AUDp2/3  =  2
AUDp4  =  4
AUDp5  =  5
AUDp6a  =  6
AUDp6b  =  6
AUDpo1  =  1
AUDpo2/3  =  2
AUDpo4  =  4
AUDpo5  =  5
AUDpo6a  =  6
AUDpo6b  =  6
CA1  =  1
CA2  =  2
CA3  =  0
DG-mo  =  0
DG-po  =  0
DG-sg  =  0
Eth  =  0
HPF  =  0
IGL  =  0
IntG  =  0
LD  =  0
LGd-co  =  0
LGd-ip  =  0
LGd-sh  =  0
LGv  =  0
LP  =  0
LT  =  0
MB  =  0
MGd  =  0
MGm  =  0
MGv  =  0
MRN  =  0
NOT  =  0
OP  =  0
PIL  =  0
PO  =  0
POL  =  0
POST  =  0
PP  =  0
PPT  =  0
PRE  =  0
PoT  =  0
ProS  =  0
RSPagl2/3  =  2
RSPagl5  =  5
RSPagl6a  =  6
RSPd6a  =  6
RSPv6a  =  6
RT  =  0
SCig  =  0
SCiw  =  0
SCop  =  0
SCsg  =  0
SGN  =  0
SSp-bfd1  =  1
SSp-bfd2/3  =  2
SSp-bfd4  =  4
SSp-bfd5  =  5
SSp-bfd6a  =  6
SSp-bfd6b  =  6
SSp-tr2/3  =  2
SSp-tr4  =  4
SSp-tr5  =  5
SSp-tr6a  =  6
SSp-tr6b  =  6
SSs2/3  =  2
SSs4  =  4
SSs5  =  5
SSs6a  =  6
SSs6b  =  6
SUB  =  0
TEa1  =  1
TEa2/3  =  2
TEa4  =  4
TEa5  =  5
TEa6a  =  6
TEa6b  =  

In [8]:
df

Unnamed: 0_level_0,layer
unit_id,Unnamed: 1_level_1
950907205,NoLayer
950907209,NoLayer
950907524,NoLayer
950907216,NoLayer
950907220,NoLayer
...,...
951911942,2/3
951911948,2/3
951911955,2/3
951911961,2/3
