# Descriptive statistics for the dataset
### The information extracted here is presented in the _Methods and Materials_ chapter, as well as under the first subsection of the _Results_ (_PYR and PV interneurons are tagged in freely-moving mice_) and in _Figure 1_.

In [1]:
from notebooks_constants import SRC_PATH
import sys
sys.path.insert(0, SRC_PATH)

import numpy as np
import os
import pandas as pd
import scipy.io as io

from paths import DATA_MAT_PATH, SAVE_PATH

In [2]:
mat = io.loadmat(DATA_MAT_PATH, simplify_cells=True)

In [3]:
fn = mat['filename']
region = mat['region']
shankclu = mat['shankclu']
act = mat['act']
exc = mat['exc']
inh = mat['inh']

In [4]:
print(f'original number of units is {len(fn)}')
print(f'of them, {region.sum()} from CA1 and {(region==0).sum()} from the nCX')

print(f'In total {(exc + inh >= 1).sum()} were tagged')
print(f'In total {exc.sum()} were tagged as excitatory')
print(f'In total {inh.sum()} were tagged as inhibitory')
print(f'In total {act.sum()} were tagged as optically activated')

inh_inds = [i for i in np.arange(len(inh)) if inh[i]]
print(f'of the inhibitory units {act[inh_inds].sum()} were optically activated')

opt_inds = [i for i in np.arange(len(act)) if act[i]]
print(f'of the excitatory units {exc[opt_inds].sum()} were optically activated')
print(f'of the inhibitory units {inh[opt_inds].sum()} were optically activated')

inds = [i for i in np.arange(len(fn)) if 0 < act[i] + exc[i] + inh[i]]  # no tagging
inds = [i for i in inds if act[i] + exc[i] < 2]  # double tagging
inds = [i for i in inds if inh[i] + exc[i] < 2]  # double tagging

print(f'In total {len(inds)} were labeled as PYR or PV')
print(f'Total PYR is {exc[inds].sum()}')
print(f'Total PV is {((inh[inds] + act[inds]) != 0).sum()}')

original number of units is 980
of them, 781 from CA1 and 199 from the nCX
In total 445 were tagged
In total 424 were tagged as excitatory
In total 21 were tagged as inhibitory
In total 98 were tagged as optically activated
of the inhibitory units 13 were optically activated
of the excitatory units 4 were optically activated
of the inhibitory units 13 were optically activated
In total 522 were labeled as PYR or PV
Total PYR is 420
Total PV is 102


In [5]:
inds7 = [i for i in inds if (fn[i] in ['m649r1_16', 'm649r1_17', 'm649r1_19', 'm649r1_21', 'm649r1_22']
                                and shankclu[i][0] == 1)]  # not 8 channels
print(f'{len(inds7)} were recorded using seven instead of eight channels')
print(f'of them, {exc[inds7].sum()} PYR and {((inh[inds7] + act[inds7]) != 0).sum()} PV')

10 were recorded using seven instead of eight channels
of them, 9 PYR and 1 PV


In [6]:
inds = [i for i in inds if not (fn[i] in ['m649r1_16', 'm649r1_17', 'm649r1_19', 'm649r1_21', 'm649r1_22']
                                and shankclu[i][0] == 1)]  # not 8 channel
print(f'In total {len(inds)} were labeled as PYR or PV')
print(f'Total PYR is {exc[inds].sum()}')
print(f'Total PV is {((inh[inds] + act[inds]) != 0).sum()}')

In total 512 were labeled as PYR or PV
Total PYR is 411
Total PV is 101


In [7]:
print(f'of the optically activated, {region[opt_inds].sum()} from CA1 and {(region[opt_inds]==0).sum()} from the nCX')

of the optically activated, 71 from CA1 and 27 from the nCX


In [8]:
pyr_inds = [i for i in inds if exc[i]]
print(f'of the pyramidal cells, {region[pyr_inds].sum()} from CA1 and {(region[pyr_inds]==0).sum()} from the nCX')

of the pyramidal cells, 377 from CA1 and 34 from the nCX


In [9]:
print(f'Number of cells from nCX in final dataset is {(region[inds] == 0).sum()}')

Number of cells from nCX in final dataset is 63


In [10]:
pv_inds = [i for i in inds if inh[i] + act[i] > 0]
print(f'Number of activated cells in PV group is {act[pv_inds].sum()}')
print(f'Number of inhibitory cells in PV group is {inh[pv_inds].sum()}')
print(f'Number of cross cells in PV group is {(inh * act)[pv_inds].sum()}')

Number of activated cells in PV group is 93
Number of inhibitory cells in PV group is 21
Number of cross cells in PV group is 13


### The following extracts the number of spikes for each cell type and the number of samples in the dataset based on a chunk size of 25, as described in the _Chunking method_ subsection of the _Methods and Materials_

In [11]:
df = None
files = os.listdir(SAVE_PATH + '0/')
for file in sorted(files):
    if df is None:
        df = pd.read_csv(SAVE_PATH + '0/' + '/' + file)
    else:
        temp = pd.read_csv(SAVE_PATH + '0/' + '/' + file)
        df = df.append(temp)

In [12]:
labels = df.label.to_numpy()
num_spikes = df.num_spikes.to_numpy()
print(f"Number of PYR spikes is {int(np.sum(num_spikes[labels == 1]))} ({np.sum(labels == 1)} units)")
print(f"Number of PV spikes is {int(np.sum(num_spikes[labels == 0]))} ({np.sum(labels == 0)} units)")

Number of PYR spikes is 5651196 (411 units)
Number of PV spikes is 11612978 (101 units)


In [13]:
df = None
files = os.listdir(SAVE_PATH + '25/')
for file in sorted(files):
    if df is None:
        df = pd.read_csv(SAVE_PATH + '25/' + '/' + file)
    else:
        temp = pd.read_csv(SAVE_PATH + '25/' + '/' + file)
        df = df.append(temp)

In [14]:
labels = df.label.to_numpy()
print(f"Number of PYR samples is {np.sum(labels == 1)}")
print(f"Number of PV samples is {np.sum(labels == 0)}")

Number of PYR samples is 225850
Number of PV samples is 464473
