In [31]:
""""""
import itertools
import logging
from enum import Enum
from pathlib import Path

import neurom as nm
import numpy as np
import pandas as pd
from lxml import etree
from neurom import NeuriteType
from scipy import stats

L = logging.getLogger(__name__)
pd.options.display.width = 0


class DISCRETE_FEATURE_NAMES(Enum):
    LEN = 'total_length'
    SURFACE_AREA = 'total_area_per_neurite'
    VOLUMES = 'neurite_volumes'
    NUMBER_OF_SECTIONS = 'number_of_sections'
    NUMBER_OF_BIFURCATIONS = 'number_of_bifurcations'
    NUMBER_OF_TERMINATIONS = 'number_of_terminations'


class CONTINUOUS_FEATURE_NAMES(Enum):
    SECTION_LEN = 'section_lengths'
    SECTION_RADIAL_DISTANCES = 'section_radial_distances'
    SECTION_PATH_DISTANCES = 'section_path_distances'
    PARTITION_ASYMMETRY = 'partition_asymmetry'
    SEGMENT_RADII = 'segment_radii'


NEURITES = (NeuriteType.soma,
            NeuriteType.axon,
            NeuriteType.basal_dendrite,
            NeuriteType.apical_dendrite,
            NeuriteType.undefined,)
NEURITE_NAMES = [type.name for type in NEURITES]

CUSTOM_FEATURE_LOAD = {
    DISCRETE_FEATURE_NAMES.SURFACE_AREA.value: {
        NeuriteType.soma.name: lambda neuron: nm.get('soma_surface_areas', neuron),
    }
}

MORPH_FILETYPES = ['.h5', '.swc', '.asc']


class Features(object):
    class INDEX(Enum):
        MTYPE = 'mtype'
        FILENAME = 'filename'
        NEURITE = 'neurite'

    _INDEX_NAMES = [index.value for index in INDEX]

    def __init__(self, index, discrete, continuous):
        self.discrete = pd.concat(discrete, keys=index, names=self._INDEX_NAMES)
        self.continuous = pd.concat(continuous, keys=index, names=self._INDEX_NAMES)


def get_discrete_features(neuron) -> pd.DataFrame:
    feature_names = [name.value for name in DISCRETE_FEATURE_NAMES]
    df = get_features(neuron, feature_names)
    df = df.applymap(np.sum)
    df.loc[NeuriteType.all.name] = df.sum()
    return df


def get_continuous_features(neuron) -> pd.DataFrame:
    feature_names = [name.value for name in CONTINUOUS_FEATURE_NAMES]
    df = get_features(neuron, feature_names)
    # `np.concatenate(x).tolist()` is used instead `np.concatenate(x)` to treat the return object
    # as a single value
    df.loc[NeuriteType.all.name] = df.aggregate(
        lambda x: np.concatenate(x).tolist()).apply(np.array)
    return df


def get_features(neuron, feature_names) -> pd.DataFrame:
    df = pd.DataFrame(index=NEURITE_NAMES, columns=feature_names)
    for neurite, feature_name in itertools.product(NEURITES, feature_names):
        val = None
        if feature_name in CUSTOM_FEATURE_LOAD:
            if neurite.name in CUSTOM_FEATURE_LOAD[feature_name]:
                val = CUSTOM_FEATURE_LOAD[feature_name][neurite.name](neuron)
        if val is None:
            val = nm.get(feature_name, neuron, neurite_type=neurite)
        df.loc[neurite.name, feature_name] = val
    return df


def get_mtype_dict(db_file: Path) -> dict:
    root = etree.parse(str(db_file)).getroot()
    mtype_dict = {}
    for morphology in root.iterfind('.//morphology'):
        name = morphology.findtext('name')
        if not name:
            L.warning('Empty morphology name in %s', db_file)
        mtype = morphology.findtext('mtype')
        if not mtype:
            L.warning('Empty morphology mtype in %s', db_file)
        if name in mtype_dict and mtype_dict[name] != mtype:
            L.warning('Multiple mtypes %s %s for %s', mtype, mtype_dict[name], name)
        mtype_dict[name] = mtype
    return mtype_dict


def build_valid_morphologies(morph_dirpath: Path):
    if not morph_dirpath.is_dir():
        raise ValueError(
            '"{}" must be a directory with morphology files'.format(morph_dirpath))
    mtype_dict = get_mtype_dict(morph_dirpath.joinpath('neuronDB.xml'))
    index, discrete, continuous = [], [], []
    for file in morph_dirpath.iterdir():
        if file.suffix in MORPH_FILETYPES:
            neuron = nm.load_neuron(str(file))
            mtype = mtype_dict[neuron.name]
            index.append((mtype, neuron.name))
            discrete.append(get_discrete_features(neuron))
            continuous.append(get_continuous_features(neuron))
    return Features(index, discrete, continuous)


def build_test_morphologies(morph_dirpath: Path):
    if not morph_dirpath.is_dir():
        raise ValueError(
            '"{}" must be a directory'.format(morph_dirpath))
    index, discrete, continuous = [], [], []
    for mtype_dir in morph_dirpath.iterdir():
        mtype = mtype_dir.name
        for file in mtype_dir.iterdir():
            if file.suffix in MORPH_FILETYPES:
                neuron = nm.load_neuron(str(file))
                index.append((mtype, neuron.name))
                discrete.append(get_discrete_features(neuron))
                continuous.append(get_continuous_features(neuron))
    return Features(index, discrete, continuous)


def ks_all(feature):
    def ks(a, b):
        # b = [el for li in b for el in li]
        b = np.concatenate(b)
        if a.size and b.size:
            return stats.ks_2samp(a, b)

    feature = feature.to_list()
    return [ks(feature[i], feature[:i] + feature[i + 1:]) for i in range(0, len(feature))]




In [32]:
valid_features = build_valid_morphologies(Path('../tests/data/valid_morphologies_mini'))
# sorted(valid_features.discrete.reset_index().set_index(['mtype','filename']).index.unique())

In [33]:
valid_features.continuous

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,section_lengths,section_radial_distances,section_path_distances,partition_asymmetry,segment_radii
mtype,filename,neurite,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
L4_TPC,rp120803_P_1_idA,soma,[],[],[],[],[]
L4_TPC,rp120803_P_1_idA,axon,"[60.899818090346855, 21.884341490684797, 105.8...","[59.01008961873389, 80.51620160903595, 55.7208...","[60.899818090346855, 82.78415958103164, 188.62...","[0.5283018867924528, 0.625, 0.8571428571428571...","[0.7350000143051147, 0.6800000071525574, 0.625..."
L4_TPC,rp120803_P_1_idA,basal_dendrite,"[31.066471442682058, 140.00000239812718, 169.9...","[30.733882091069685, 169.10156053349084, 197.1...","[31.066471442682058, 171.06647384080924, 201.0...","[0.0, 0.0, 0.0, 0.0, 0.6666666666666666, 0.5, ...","[0.4399999976158142, 0.6074999868869781, 0.774..."
L4_TPC,rp120803_P_1_idA,apical_dendrite,"[12.007465015734574, 4.299088327143525, 4.2051...","[12.003769900573529, 16.191794116806577, 19.88...","[12.007465015734574, 16.3065533428781, 20.5117...","[0.7058823529411765, 0.9285714285714286, 0.461...","[1.8799999952316284, 1.6399999856948853, 1.344..."
L4_TPC,rp120803_P_1_idA,undefined,[],[],[],[],[]
...,...,...,...,...,...,...,...
L4_UPC,rp120521_P_1_idB,axon,"[80.8114889081482, 11.643497604469399, 4.77100...","[78.66376975295114, 89.43195264754499, 93.7966...","[80.8114889081482, 92.45498651261761, 97.22598...","[0.54, 0.9210526315789473, 0.9722222222222222,...","[0.625, 0.625, 0.625, 0.5900000035762787, 0.55..."
L4_UPC,rp120521_P_1_idB,basal_dendrite,"[12.72816151832084, 25.81686848886153, 146.688...","[12.690994054149796, 35.22455556873654, 171.74...","[12.72816151832084, 38.54503000718237, 185.233...","[0.4, 0.0, 0.6666666666666666, 0.5, 0.0, 0.0, ...","[0.9200000166893005, 0.9200000166893005, 0.957..."
L4_UPC,rp120521_P_1_idB,apical_dendrite,"[13.142382945106654, 21.016643745949583, 34.44...","[13.142330329506953, 31.943713386265596, 62.82...","[13.142382945106654, 34.15902669105624, 68.600...","[0.7, 0.875, 0.5714285714285714, 0.8, 0.75, 0....","[2.5799999237060547, 2.322499990463257, 1.8800..."
L4_UPC,rp120521_P_1_idB,undefined,[],[],[],[],[]


In [34]:
valid_features.continuous.groupby([Features.INDEX.MTYPE.value, Features.INDEX.FILENAME.value]).transform(ks_all)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,section_lengths,section_radial_distances,section_path_distances,partition_asymmetry,segment_radii
mtype,filename,neurite,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
L4_TPC,rp120803_P_1_idA,soma,,,,,
L4_TPC,rp120803_P_1_idA,axon,"(0.12162335168352324, 0.21483093457332125)","(0.3310715657406222, 1.6728589768977287e-07)","(0.3745145734647719, 1.611332289819245e-09)","(0.08892209975714553, 0.9164644850194626)","(0.45078493830456123, 0.0)"
L4_TPC,rp120803_P_1_idA,basal_dendrite,"(0.17049180327868851, 0.5559735145471552)","(0.5114754098360655, 2.727554528214604e-05)","(0.6426229508196721, 2.2017737522084246e-08)","(0.272108843537415, 0.6231133203454644)","(0.7255272990416896, 0.0)"
L4_TPC,rp120803_P_1_idA,apical_dendrite,"(0.23151693667157586, 0.05845021220126201)","(0.46067746686303385, 1.468600827059241e-06)","(0.5019145802650957, 9.225365305010058e-08)","(0.12194074710176041, 0.954394017648689)","(0.7447090034674787, 0.0)"
L4_TPC,rp120803_P_1_idA,undefined,,,,,
...,...,...,...,...,...,...,...
L4_UPC,rp120521_P_1_idB,axon,"(0.12150103899278816, 0.2200276888639262)","(0.32000977875565334, 5.564629804721832e-07)","(0.375422727457931, 1.7137568031344586e-09)","(0.09614035087719298, 0.8663093987674001)","(0.4442834883946526, 0.0)"
L4_UPC,rp120521_P_1_idB,basal_dendrite,"(0.1564625850340136, 0.21938241293264715)","(0.4663945578231293, 5.37624822438687e-09)","(0.5480272108843537, 1.811772953885793e-12)","(0.28681177976952626, 0.06953638594031442)","(0.625, 0.0)"
L4_UPC,rp120521_P_1_idB,apical_dendrite,"(0.14123544154503906, 0.773054882147471)","(0.21553884711779447, 0.27514929112483644)","(0.3281733746130031, 0.02143086951655515)","(0.36623376623376624, 0.1228654391720313)","(0.6265274810450852, 0.0)"
L4_UPC,rp120521_P_1_idB,undefined,,,,,


In [34]:
valid_features.discrete.loc['L23_BTC'].groupby(['neurite']).mean()

Unnamed: 0_level_0,total_length,total_area_per_neurite,neurite_volumes,number_of_sections,number_of_bifurcations,number_of_terminations
neurite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
soma,0.0,1100.617763,0.0,0.0,0.0,0.0
axon,21909.623738,15806.10844,1070.007553,372.571429,183.428571,185.357143
basal_dendrite,4525.554882,7747.470474,1515.062181,64.642857,29.928571,34.357143
apical_dendrite,0.0,0.0,0.0,0.0,0.0,0.0
undefined,0.0,0.0,0.0,0.0,0.0,0.0
all,26435.17862,24654.196676,2585.069734,437.214286,213.357143,219.714286


In [35]:
valid_features.discrete.loc['L23_BTC'].groupby(['neurite']).std()

Unnamed: 0_level_0,total_length,total_area_per_neurite,neurite_volumes,number_of_sections,number_of_bifurcations,number_of_terminations
neurite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
soma,0.0,346.17362,0.0,0.0,0.0,0.0
axon,13337.462418,8635.542901,628.845275,253.82905,125.597473,125.572907
basal_dendrite,2037.909317,2924.951734,802.855859,22.186646,10.737886,11.392353
apical_dendrite,0.0,0.0,0.0,0.0,0.0,0.0
undefined,0.0,0.0,0.0,0.0,0.0,0.0
all,13579.521591,9265.807113,1066.68139,260.64452,128.172724,129.611507


In [31]:
test_features = build_test_morphologies(Path('../tests/data/test_morphologies'))

In [41]:
valid_features.discrete.loc['L5_MC'].groupby(['neurite'])['number_of_sections'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
neurite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
soma,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
axon,38.0,396.0,244.726102,75.0,247.0,322.0,476.5,1251.0
basal_dendrite,38.0,84.131579,47.084267,20.0,51.75,77.5,91.75,225.0
apical_dendrite,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
undefined,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
all,38.0,480.131579,259.615013,114.0,325.5,399.5,558.75,1415.0


In [37]:
test_features.discrete

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_length,total_area_per_neurite,neurite_volumes,number_of_sections,number_of_bifurcations,number_of_terminations
mtype,filename,neurite,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
L5_MC,L5_MC_ID200,soma,0.0,832.291196,0.0,0.0,0.0,0.0
L5_MC,L5_MC_ID200,axon,10740.997311,13497.459701,1349.739171,213.0,106.0,107.0
L5_MC,L5_MC_ID200,basal_dendrite,16185.499228,45763.41971,10296.769162,233.0,112.0,121.0
L5_MC,L5_MC_ID200,apical_dendrite,0.0,0.0,0.0,0.0,0.0,0.0
L5_MC,L5_MC_ID200,undefined,0.0,0.0,0.0,0.0,0.0,0.0
L5_MC,L5_MC_ID200,all,26926.496539,60093.170606,11646.508333,446.0,218.0,228.0
L5_MC,L5_MC_ID201,soma,0.0,865.224591,0.0,0.0,0.0,0.0
L5_MC,L5_MC_ID201,axon,8727.996319,10967.848012,1096.777998,171.0,85.0,86.0
L5_MC,L5_MC_ID201,basal_dendrite,10585.998108,29931.203706,6734.520655,172.0,82.0,90.0
L5_MC,L5_MC_ID201,apical_dendrite,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
mean = valid_features.discrete.loc['L5_MC'].groupby(['neurite']).mean()
std = valid_features.discrete.loc['L5_MC'].groupby(['neurite']).std()

In [43]:
mean

Unnamed: 0_level_0,total_length,total_area_per_neurite,neurite_volumes,number_of_sections,number_of_bifurcations,number_of_terminations
neurite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
soma,0.0,1215.662423,0.0,0.0,0.0,0.0
axon,21497.020458,12582.615757,718.369387,396.0,196.131579,198.421053
basal_dendrite,4866.428987,10021.738036,2642.943829,84.131579,39.894737,43.842105
apical_dendrite,0.0,0.0,0.0,0.0,0.0,0.0
undefined,0.0,0.0,0.0,0.0,0.0,0.0
all,26363.449446,23820.016216,3361.313217,480.131579,236.026316,242.263158


In [47]:
z_score = (test_features.discrete.loc['L5_MC'] - mean) / std
z_score

Unnamed: 0_level_0,Unnamed: 1_level_0,total_length,total_area_per_neurite,neurite_volumes,number_of_sections,number_of_bifurcations,number_of_terminations
filename,neurite,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
L5_MC_ID200,soma,,-0.893265,,,,
L5_MC_ID200,axon,-0.955574,0.135105,1.262074,-0.747775,-0.74044,-0.746448
L5_MC_ID200,basal_dendrite,4.84555,8.131367,5.002393,3.161744,3.109805,3.254001
L5_MC_ID200,apical_dendrite,,,,,,
L5_MC_ID200,undefined,,,,,,
L5_MC_ID200,all,0.046394,4.097061,5.0554,-0.13147,-0.139824,-0.109609
L5_MC_ID201,soma,,-0.816529,,,,
L5_MC_ID201,axon,-1.134411,-0.238471,0.756419,-0.919395,-0.912957,-0.917911
L5_MC_ID201,basal_dendrite,2.448475,4.529478,2.674176,1.866195,1.815945,1.94663
L5_MC_ID201,apical_dendrite,,,,,,


In [56]:
z_score.abs() > 2

Unnamed: 0_level_0,Unnamed: 1_level_0,total_length,total_area_per_neurite,neurite_volumes,number_of_sections,number_of_bifurcations,number_of_terminations
filename,neurite,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
L5_MC_ID200,soma,False,False,False,False,False,False
L5_MC_ID200,axon,False,False,False,False,False,False
L5_MC_ID200,basal_dendrite,True,True,True,True,True,True
L5_MC_ID200,apical_dendrite,False,False,False,False,False,False
L5_MC_ID200,undefined,False,False,False,False,False,False
L5_MC_ID200,all,False,True,True,False,False,False
L5_MC_ID201,soma,False,False,False,False,False,False
L5_MC_ID201,axon,False,False,False,False,False,False
L5_MC_ID201,basal_dendrite,True,True,True,False,False,False
L5_MC_ID201,apical_dendrite,False,False,False,False,False,False


In [61]:
discrete_report = (z_score.abs() > 2).any(axis=1)
discrete_report

filename     neurite        
L5_MC_ID200  soma               False
             axon               False
             basal_dendrite      True
             apical_dendrite    False
             undefined          False
             all                 True
L5_MC_ID201  soma               False
             axon               False
             basal_dendrite      True
             apical_dendrite    False
             undefined          False
             all                 True
dtype: bool

In [67]:
(~discrete_report).groupby(['filename']).all()

filename
L5_MC_ID200    False
L5_MC_ID201    False
dtype: bool