# Assembling data
Trying to tease apart several potentially related variables
* Mitochondrial coverage
* Mitochondrial index (mito pl / total pl)
* Linear synapse density
* Soma volume
* Nucleus volume
* Mitochondrial density within the somatic cytosol
* Soma surface synapse density
* Depth

This notebook is intended to collect this data in a single dataframe for later analysis and interpretation.

### Downloading data
Uncomment and run the cell below if you'd like to run the cells below

WARNING: This notebook has high data requirements (mostly the meshes), but no visualization. The other notebooks will work without running these, as the results of this notebook have been included in the intermediate data tarball.

In [1]:
#! bash downloadData.sh
#! bash downloadSkeletons.sh
#! bash downloadMeshes.sh

In [2]:
from collections import Counter

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from meshparty import trimesh_io
from lib import skel, compartment, u, coverage, plot, mesh

# Reading data

In [3]:
MITODF_FILENAME = "data/pni_mito_cellswskel_v185.csv"
CLEANIDS_FILENAME = "data/clean_compartment_ids_v185.csv"
DISTDF_FILENAME = "data/mito_to_skel_v185.h5"
COMPLETE_FILENAME = "data/complete_soma_ids_v185.csv"
SOMAVOL_FILENAME = "data/neuron_somavol_v185.csv"
POSTSYNDF_FILENAME = "data/neuron_received_synapses_v185.csv"
NUCDF_FILENAME = "data/pni_nucleus_segments_v185.csv"
NUC_LOOKUP_FILENAME = "data/cleanandcomplete_nucleus_lookup.csv"
TRANSFORM_FILENAME = "data/aibs_depth_rotation.npy"

In [4]:
mitodf = pd.read_csv(MITODF_FILENAME, index_col=0)
cleanids = u.read_ids(CLEANIDS_FILENAME)
distdf = compartment.read_dist_df(DISTDF_FILENAME)
completeids = u.read_ids(COMPLETE_FILENAME)
somavol = pd.read_csv(SOMAVOL_FILENAME, index_col=0)
postsyndf = pd.read_csv(POSTSYNDF_FILENAME, index_col=0)
nucdf = pd.read_csv(NUCDF_FILENAME, index_col=0)
nuc_lookup = u.read_lookup(NUC_LOOKUP_FILENAME, sep=',')
pinky_transform = np.load(TRANSFORM_FILENAME)

In [5]:
cleanandcomplete = list(set(completeids).intersection(cleanids))

In [6]:
somavol = dict(zip(somavol.index, somavol.vols))

# Dendrite statistics
* Mitochondrial coverage
* Mitochondrial index
* Linear synapse density

In [7]:
def synapses_per_node(cellid):
    centroids = u.extract_coords_dlcsv(
        postsyndf[postsyndf.post_pt_root_id == cellid]) * [3.58, 3.58, 40]
    
    segskel = skel.read_neuron_skel(cellid)
    
    kdt = u.KDTree(segskel.vertices)
    
    return Counter(kdt.query(centroids)[1])


def compute_all_cov_and_densities(cellids):
    
    subdfs = list()
    for (i, cellid) in enumerate(cellids):
        print(f"#{i+1} of {len(cellids)}", end="\r")
        covered = distdf.nodeids[distdf.cellid == cellid]
        synapse_nodes = synapses_per_node(cellid)
        subdf = coverage.bulk_mitochondrial_coverage_by_comp(
            cellid, covered, synapse_nodes=synapse_nodes)
        cellid_series = pd.Series([cellid] * len(subdf), dtype=np.uint64)
        subdf.loc[:, "cellid"] = cellid_series
        subdfs.append(subdf)
        
    return pd.concat(subdfs, ignore_index=True)

In [8]:
covdf = compute_all_cov_and_densities(cleanandcomplete)

#65 of 65

In [9]:
cleanandcomplete[32]

648518346349538365

In [10]:
covdf = covdf[covdf.compartment.isin([2, 3])]

In [11]:
covdf.head()

Unnamed: 0,coverage,compartment,pathlength,synapsecount,synapse_density,cellid
2,0.79722,2,1135.767461,1987,1.749478,648518346349539076
3,0.781562,3,436.910114,870,1.991256,648518346349539076
6,0.784184,2,1670.96625,3122,1.86838,648518346349538053
7,0.762532,3,565.514823,1018,1.80013,648518346349538053
10,0.750715,2,991.071646,1643,1.657801,648518346349539591


### Mitochondrial Coverage Factor

In [12]:
mitocovfactor = coverage.compute_bulk_mitocovfactor(distdf, cleanandcomplete)

#65 of 65

In [13]:
mitocovfactor = mitocovfactor[mitocovfactor.nodelbl1.isin([2, 3])]

In [14]:
mitocovfactor.head()

Unnamed: 0,nodelbl1,mitocovfactor,cellid
2,2,0.913667,648518346349539076
3,3,0.984025,648518346349539076
6,2,0.851435,648518346349538053
7,3,0.841754,648518346349538053
10,2,0.879747,648518346349539591


In [15]:
covdf = pd.merge(covdf, mitocovfactor,
                 left_on=["cellid", "compartment"],
                 right_on=["cellid", "nodelbl1"]).drop(
                 ["nodelbl1"], axis=1)

In [16]:
covdf.head()

Unnamed: 0,coverage,compartment,pathlength,synapsecount,synapse_density,cellid,mitocovfactor
0,0.79722,2,1135.767461,1987,1.749478,648518346349539076,0.913667
1,0.781562,3,436.910114,870,1.991256,648518346349539076,0.984025
2,0.784184,2,1670.96625,3122,1.86838,648518346349538053,0.851435
3,0.762532,3,565.514823,1018,1.80013,648518346349538053,0.841754
4,0.750715,2,991.071646,1643,1.657801,648518346349539591,0.879747


### % path length near soma (within 20$\mathrm{\mu}$m)

In [17]:
allnearpls = coverage.path_length_near_somas(cleanandcomplete, distthresh=20)

#65 of 65

In [18]:
plwithinthresh = allnearpls.rename({"pathlength": "%pl within 20um"}, axis=1)

In [19]:
covdf = pd.merge(covdf, plwithinthresh, #how="left",
                 left_on=["cellid", "compartment"],
                 right_on=["cellid", "nodelbl1"]).drop("nodelbl1", axis=1)

In [20]:
covdf.head()

Unnamed: 0,coverage,compartment,pathlength,synapsecount,synapse_density,cellid,mitocovfactor,%pl within 20um
0,0.79722,2,1135.767461,1987,1.749478,648518346349539076,0.913667,0.532621
1,0.781562,3,436.910114,870,1.991256,648518346349539076,0.984025,0.214843
2,0.784184,2,1670.96625,3122,1.86838,648518346349538053,0.851435,0.374298
3,0.762532,3,565.514823,1018,1.80013,648518346349538053,0.841754,0.207247
4,0.750715,2,991.071646,1643,1.657801,648518346349539591,0.879747,0.628978


# Soma statistics
* Soma Volume
* Nucleus Volume
* Mitochondrial density within the somatic cytosol
* Somatic synapse density

### Soma Volume

In [21]:
covdf["somavol"] = [somavol[i] for i in covdf.cellid]

### Nucleus volume

Nuclei were segmented at a coarse resolution (specified here in $\mu$m$^3$

In [22]:
nuclei_voxelres = (57.28/1000.
                   * 57.28/1000.
                   * 40/1000.)

nucdf["vol"] = nucdf["size"] * nuclei_voxelres

In [23]:
covdf.head()

Unnamed: 0,coverage,compartment,pathlength,synapsecount,synapse_density,cellid,mitocovfactor,%pl within 20um,somavol
0,0.79722,2,1135.767461,1987,1.749478,648518346349539076,0.913667,0.532621,1397.574125
1,0.781562,3,436.910114,870,1.991256,648518346349539076,0.984025,0.214843,1397.574125
2,0.784184,2,1670.96625,3122,1.86838,648518346349538053,0.851435,0.374298,1168.690198
3,0.762532,3,565.514823,1018,1.80013,648518346349538053,0.841754,0.207247,1168.690198
4,0.750715,2,991.071646,1643,1.657801,648518346349539591,0.879747,0.628978,1406.871081


In [24]:
covdf["nucvol"] = [nucdf["vol"].loc[nuc_lookup[i]] for i in covdf.cellid]

### Mitochondrial density within the somatic cytosol

In [25]:
def all_somatic(mitodf, distthresh=15_000):
    subdfs = list()
    cellids = np.unique(mitodf.cellid)
    for (i, cellid) in enumerate(cellids):
        print(f"#{i+1} of {len(cellids)}", end="\r")
        cellskel = skel.read_neuron_skel(cellid)
        subdf = mitodf[mitodf.cellid == cellid]
        subdistdf = distdf[distdf.cellid == cellid]
        somacoord = cellskel.vertices[cellskel.root]
        
        somadists = eucdist(cellskel.vertices, somacoord)
        to_remove = np.flatnonzero(somadists > distthresh)
        mitos_torm = subdistdf.mitoids[subdistdf.nodeids.isin(to_remove)]
        
        subdfs.append(subdf[~subdf.index.isin(mitos_torm)])
    
    return pd.concat(subdfs)


def eucdist(arr, coord, res=[1, 1, 1]):
    return np.linalg.norm((arr * res) - coord, axis=1)

In [26]:
somaticdf = all_somatic(mitodf[mitodf.cellid.isin(cleanandcomplete)])

#65 of 65

In [27]:
somaticsums = somaticdf.groupby("cellid")["mito_vx"].sum()

In [28]:
somaticcounts = somaticdf.groupby("cellid")["mito_vx"].count()

In [29]:
mito_voxelres = (7.16/1000. * 7.16/1000. * 40/1000.)
covdf["somamitovol"] = somaticsums.loc[covdf.cellid].values * mito_voxelres

In [30]:
covdf["somamitodensity"] = (covdf["somamitovol"] / 
                               (covdf["somavol"] - covdf["nucvol"]))

In [31]:
covdf["somaticmitocount"] = somaticcounts.loc[covdf.cellid].values

In [32]:
covdf.head()

Unnamed: 0,coverage,compartment,pathlength,synapsecount,synapse_density,cellid,mitocovfactor,%pl within 20um,somavol,nucvol,somamitovol,somamitodensity,somaticmitocount
0,0.79722,2,1135.767461,1987,1.749478,648518346349539076,0.913667,0.532621,1397.574125,552.057116,73.618911,0.08707,209
1,0.781562,3,436.910114,870,1.991256,648518346349539076,0.984025,0.214843,1397.574125,552.057116,73.618911,0.08707,209
2,0.784184,2,1670.96625,3122,1.86838,648518346349538053,0.851435,0.374298,1168.690198,489.896633,72.334162,0.106563,266
3,0.762532,3,565.514823,1018,1.80013,648518346349538053,0.841754,0.207247,1168.690198,489.896633,72.334162,0.106563,266
4,0.750715,2,991.071646,1643,1.657801,648518346349539591,0.879747,0.628978,1406.871081,519.001845,94.052091,0.10593,305


### Somatic synapse density

#### Soma surface area

In [33]:
def measure_surface_areas(cellids, distthresh=15_000):
    surface_areas = dict()
    for (i, cellid) in enumerate(cellids):
        print(f"#{i+1} of {len(cellids)}", end="\r")
        surface_areas[cellid] = measure_soma_surface_area(cellid)
    
    return surface_areas
        

def measure_soma_surface_area(i):
    cellskel = skel.read_neuron_skel(i)
    somapt = cellskel.vertices[cellskel.root]
    cellmesh = mesh.read_neuron_mesh(i)
    masked = mask_mesh(cellmesh, somapt)

    return masked.area

    
def mask_mesh(mesh, pt, distthresh=15_000):
    dists = distance_to_pt(mesh.vertices, pt)

    inds = np.flatnonzero(dists < distthresh)
    new_verts = mesh.vertices[inds]
    
    ind_map = np.empty((max(inds)+1,), dtype=inds.dtype)
    ind_map[inds] = np.arange(len(inds))
    face_inds = np.all(np.isin(mesh.faces, inds), axis=1)
    new_faces = ind_map[mesh.faces[face_inds]]
    
    return trimesh_io.Mesh(new_verts, new_faces)


def distance_to_pt(other_pts, pt):
    return np.linalg.norm(other_pts - pt, axis=1)

In [34]:
surfareas = measure_surface_areas(cleanandcomplete, distthresh=15_000)

#65 of 65

Scaling to $\mathrm{\mu}$m$^2$

In [35]:
surfareas_um2 = {k: v / 1e6 for (k, v) in surfareas.items()}

#### Soma synapse count

In [36]:
def somatic_synapses(cellids, distthresh=15_000):
    subdfs = list()
    for (i, cellid) in enumerate(cellids):
        print(f"#{i+1} of {len(cellids)}", end="\r")
        cellskel = skel.read_neuron_skel(cellid)
        somacoord = cellskel.vertices[cellskel.root]
        
        subdf = postsyndf[postsyndf.post_pt_root_id == cellid]
        somadists = eucdist(u.extract_coords_dlcsv(subdf),
                            somacoord, [3.58, 3.58, 40])
        subdfs.append(subdf[somadists < distthresh])
    
    return pd.concat(subdfs)

In [37]:
somaticsyn_df = somatic_synapses(cleanandcomplete)

#65 of 65

In [38]:
syncounts = dict(somaticsyn_df.groupby("post_pt_root_id")["id"].count())
syndensity = {i: syncounts[i] / surfareas_um2[i] for i in cleanandcomplete}

In [39]:
covdf["somasyndensity"] = [syndensity[i] for i in covdf.cellid]

In [40]:
min(syncounts.values()), max(syncounts.values()), sum(syncounts.values()) / len(syncounts)

(84, 172, 116.81538461538462)

# Depth

In [41]:
def get_cell_coords(cellids):
    coords = dict()
    for cellid in cellids:
        cellskel = skel.read_neuron_skel(cellid)
        coords[cellid] = cellskel.vertices[cellskel.root]
    
    return coords

In [42]:
coords = get_cell_coords(cleanandcomplete)

In [43]:
coords[cleanandcomplete[0]].shape

(3,)

In [44]:
(pinky_transform @ coords[cleanandcomplete[0]][:, np.newaxis]).T

array([[183975.21932167, 254778.91494975,    870.11369602]])

In [45]:
np.dot(pinky_transform, coords[cleanandcomplete[0]][:, np.newaxis])

array([[183975.21932167],
       [254778.91494975],
       [   870.11369602]])

In [46]:
def transform_coord(coord):
    # scaling to a more accurate voxel resolution
    # (also to make it compatible with the transformation derived
    # at this resolution)
    coord = coord * [3.58/4, 3.58/4, 1]
    return (pinky_transform @ coord[:, np.newaxis]).ravel()

def transform_all_coords(cellids):
    return {cellid: transform_coord(coords[cellid])
            for cellid in cellids}

In [47]:
transformed = transform_all_coords(cleanandcomplete)

In [48]:
covdf["depth"] = np.array([transformed[cellid][1] for cellid in covdf.cellid])

# Formatting

### Renaming cols

In [49]:
covdf = covdf.rename({
    "coverage": "dendmitocoverage",
    "synapse_density": "dendsyndensity",
}, axis=1)

### Combining basal and apical rows

In [50]:
basal_covdf = covdf[covdf.compartment == 2]
apical_covdf = covdf[covdf.compartment == 3]

In [51]:
basal_covdf = basal_covdf.rename(
    {"dendmitocoverage": "basalmitocoverage",
     "dendsyndensity": "basalsyndensity",
     "synapsecount": "basalsyncount",
     "mitocovfactor": "basalmitocovfactor",
     "%pl within 20um": "basal %pl within 20um",
     "pathlength": "basalpathlength"}, axis=1
    ).drop("compartment", axis=1)
apical_covdf = apical_covdf.rename(
    {"dendmitocoverage": "apicalmitocoverage",
     "dendsyndensity": "apicalsyndensity",
     "synapsecount": "apicalsyncount",
     "mitocovfactor": "apicalmitocovfactor",
     "%pl within 20um": "apical %pl within 20um",
     "pathlength": "apicalpathlength"}, axis=1
    ).drop(
    ["somavol", "nucvol", "somamitovol", "somamitodensity",
     "somaticmitocount", "somasyndensity", "depth", "compartment"], axis=1)

In [52]:
covdf = pd.merge(basal_covdf, apical_covdf, how="left",
                 left_on="cellid", right_on="cellid")

In [53]:
covdf.head()

Unnamed: 0,basalmitocoverage,basalpathlength,basalsyncount,basalsyndensity,cellid,basalmitocovfactor,basal %pl within 20um,somavol,nucvol,somamitovol,somamitodensity,somaticmitocount,somasyndensity,depth,apicalmitocoverage,apicalpathlength,apicalsyncount,apicalsyndensity,apicalmitocovfactor,apical %pl within 20um
0,0.79722,1135.767461,1987,1.749478,648518346349539076,0.913667,0.532621,1397.574125,552.057116,73.618911,0.08707,209,0.095585,230328.257398,0.781562,436.910114,870.0,1.991256,0.984025,0.214843
1,0.784184,1670.96625,3122,1.86838,648518346349538053,0.851435,0.374298,1168.690198,489.896633,72.334162,0.106563,266,0.105546,206077.104367,0.762532,565.514823,1018.0,1.80013,0.841754,0.207247
2,0.750715,991.071646,1643,1.657801,648518346349539591,0.879747,0.628978,1406.871081,519.001845,94.052091,0.10593,305,0.110872,244008.897477,0.823897,832.012657,1428.0,1.71632,0.907548,0.197432
3,0.827072,1839.622475,3897,2.118369,648518346349538440,0.923517,0.309913,1475.944871,568.471295,84.315761,0.092913,388,0.126731,205619.742385,0.794688,640.409467,1379.0,2.15331,0.964497,0.332521
4,0.713487,1886.992268,3161,1.675153,648518346349539593,0.777246,0.450823,1053.064184,461.694352,63.940285,0.108122,239,0.094933,218137.586497,0.791422,446.927376,757.0,1.693787,0.926741,0.251376


# Saving results

In [54]:
covdf.to_csv(f"data/mito_covariates.csv")