In [1]:
from time import time
from configparser import ConfigParser
import pathlib as pl

from IPython.display import display, Image

import joblib as jl
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from src.root_path import config_path
from src.visualization.interactive import *

[nems.configs.defaults INFO] Saving log messages to /tmp/nems/NEMS 2022-03-04 180832.log


In [2]:
config = ConfigParser()
config.read_file(open(config_path / 'settings.ini'))
meta = {'reliability': 0.1,  # r value
        'smoothing_window': 0,  # ms
        'raster_fs': 30,
        'montecarlo': 1000,
        'zscore': True,
        'stim_type': 'permutations'}

file_dict = dict(
dprime = pl.Path(config['paths']['analysis_cache']) / f'220224_ctx_mod_metric_DF_cluster_mass',
mean_difference = pl.Path(config['paths']['analysis_cache']) / f'220301_ctx_mod_metric_DF_mean_diff_cluster_mass',
t_statistic = pl.Path(config['paths']['analysis_cache']) / f'220303_ctx_mod_metric_DF_tstat_cluster_mass'
)
### same example cell as in figure 1 ###
prb_idx = 3 - 1# selected probe. the -1 is to acount for 0 not being used
ctx_pair = [0,1] # pair of contexts to compare and exemplify d'
cellid = 'ARM021b-36-8'


def format_dataframe(DF):
    ff_analylis = DF.analysis.isin(['SC'])
    ff_badsites = ~DF.siteid.isin(['TNC010a'])
    mask = ff_analylis & ff_badsites

    if 'cluster_threshold' not in DF.columns:
        DF['cluster_threshold'] = 0

    good_cols =['source', 'mult_comp_corr', 'cluster_threshold', 'region', 'siteid',  'cellid', 'context_pair',
                'probe', 'metric', 'value']
    filtered = DF.loc[mask, good_cols]

    filtered['probe'] = [int(p) for p in filtered['probe']]
    filtered['context_pair'] = [f"{int(cp.split('_')[0]):02d}_{int(cp.split('_')[1]):02d}"
                                for cp in filtered['context_pair']]

    # rename metrics and analysis for ease of ploting
    filtered['metric'] = filtered['metric'].replace({'significant_abs_mass_center': 'duration',
                                                     'significant_abs_sum': "amplitude"})

    filtered['id'] = filtered['cellid'].fillna(value=filtered['siteid'])
    filtered = filtered.drop(columns=['cellid'])
    filtered.rename(columns={'siteid':'site'}, inplace=True)

    filtered['value'] = filtered['value'].fillna(value=0)

    # permutation related preprocesing.
    # creates a new column relating probe with  context pairs
    ctx = np.asarray([row.split('_') for row in filtered.context_pair], dtype=int)
    prb = np.asarray(filtered.probe, dtype=int)

    silence = ctx == 0
    same = ctx == prb[:,None]
    different = np.logical_and(~silence, ~same)

    name_arr = np.full_like(ctx, np.nan, dtype=object)
    name_arr[silence] = 'silence'
    name_arr[same] = 'same'
    name_arr[different] = 'diff'
    comp_name_arr = np.apply_along_axis('_'.join, 1, name_arr)

    # swaps clasification names to not have repetitions i.e. diff_same == same_diff
    comp_name_arr[np.where(comp_name_arr == 'same_silence')] = 'silence_same'
    comp_name_arr[np.where(comp_name_arr == 'diff_silence')] = 'silence_diff'
    comp_name_arr[np.where(comp_name_arr == 'diff_same')] = 'same_diff'
    comp_name_arr[np.where(comp_name_arr == 'same_silence')] = 'silence_same'

    filtered['trans_pair'] = comp_name_arr


    # column specifying number of different sounds used
    nstim = filtered.groupby(['id']).agg(stim_count=('probe',lambda x: x.nunique()))
    filtered = pd.merge(filtered, nstim, on='id')

    return filtered

longDF = list()
for diff_metric, file in file_dict.items():
    df = format_dataframe(jl.load(file))
    df['diff_metric'] = diff_metric
    longDF.append(df)


longDF = pd.concat(longDF, axis=0, ignore_index=True)

# longDF = format_dataframe(jl.load(mass_file_diff))

longDF.head()

Unnamed: 0,source,mult_comp_corr,cluster_threshold,region,site,context_pair,probe,metric,value,id,trans_pair,stim_count,diff_metric
0,real,bf_cp,1.0,A1,ley072b,00_01,1,duration,0.0,ley072b-03-1,silence_same,4,dprime
1,real,bf_cp,1.0,A1,ley072b,00_02,1,duration,0.0,ley072b-03-1,silence_diff,4,dprime
2,real,bf_cp,1.0,A1,ley072b,00_03,1,duration,0.0,ley072b-03-1,silence_diff,4,dprime
3,real,bf_cp,1.0,A1,ley072b,00_04,1,duration,0.0,ley072b-03-1,silence_diff,4,dprime
4,real,bf_cp,1.0,A1,ley072b,01_02,1,duration,0.0,ley072b-03-1,same_diff,4,dprime


In [3]:
longDF.columns

Index(['source', 'mult_comp_corr', 'cluster_threshold', 'region', 'site',
       'context_pair', 'probe', 'metric', 'value', 'id', 'trans_pair',
       'stim_count', 'diff_metric'],
      dtype='object')

# Differences between mutiple comparisons
in the table we see the percentage of context-pair * probe * neuron that are significantly modulate
considering different multiple bonferrony corrections for context and probe (bf_cp), context probe time (bf_cpt),
neuron context probe time (bf_ncpt), just time (bf_t), old 3 consecutive time bins, and with different
cluster thresholds for the cluster mass analysis.

In [None]:
pivoted = longDF.pivot_table(index=['source', 'diff_metric', 'mult_comp_corr', 'cluster_threshold',
                                    'region', 'stim_count',
                                    'context_pair', 'probe',
                                    'id', 'site'],
                             columns=['metric'], values='value', aggfunc='first')

In [None]:

counts = pivoted.groupby(['source', 'diff_metric', 'mult_comp_corr', 'cluster_threshold']
                         ).agg(signif_percent=('duration', lambda x: (x>0).sum()/x.size * 100 ))
print(counts)


## different distributions in context modulation space
scatter of amplitud and duration of contextual modulations calculated for different time series metrics
(dprime, mean difference and  t-statistic) and with different bonferroni corrections


In [None]:
idx = pd.IndexSlice
mult_comps = ['none', 'bf_cp', 'bf_ncp']
clust_threshs = [0.05, 2]
toplot = pivoted.loc[idx['real', :, mult_comps, clust_threshs],:].reset_index()

scat = px.scatter(data_frame=toplot, x='duration', y='amplitude',
                  facet_col='mult_comp_corr', facet_row='diff_metric',
                  hover_data=['id', 'context_pair', 'probe'], color='site')

_ = scat.update_layout(showlegend=False)
_ = scat.update_yaxes(matches=None)

#### big figure output
this figure pushes the notebook size over 100MB, which github does not like.
Instead of displaying the dynamic figure I can show a much smaller jpeg for long term storage

In [None]:
# scat.show() # interactive, big file
# png to not fill so much space
img_bytes = scat.to_image(format="png")
Image(img_bytes)

# example neuron from the previous plot.
this neuron appears on the lowe threshold (permisive) but not high threshold cluster analysis

In [None]:
# cellid, contexts, probes = 'ARM021b-36-8', (0, 1), 3  # paper example
cellid, contexts, probes = 'ARM017a-31-1', (0,3), 3 # odd tail in bf_cp-2.0
cellid, contexts, probes = 'TNC008a-05-1', (0,8), 7 # odd tail in bf_cp-2.0

dprm = plot_time_ser_quant(cellid, contexts, probes, source='real',
                           multiple_comparisons_axis=[1,2], consecutive=0, cluster_threshold=0.05,
                           fn_name='t_statistic', meta=meta)
psth = plot_psth_pair(cellid, contexts, probes)

In [None]:
subplots = make_subplots(1,2)
subplots.update_layout(showlegend=False)
_ = subplots.add_traces(psth['data'], rows=[1]*len(psth['data']),cols=[1]*len(psth['data']))
_ = subplots.add_vline(x=0, line_width=2, line_color='black', line_dash='dot', opacity=1, row=1, col=1)
_ = subplots.add_traces(dprm['data'], rows=[1]*len(dprm['data']),cols=[2]*len(dprm['data']))

# subplots.show()
subplots_bytes = subplots.to_image(format="png")
Image(subplots_bytes)

### find example clearly divergent between thresholds

In [None]:
idx = pd.IndexSlice
clust_threshs = [1.0, 2.0]
filt = pivoted.loc[idx['real', 'dprime', 'bf_cp', clust_threshs],:].reset_index()
piv2 = filt.pivot_table(index=['context_pair', 'probe', 'id'], columns=['cluster_threshold'], values='amplitude',
                        aggfunc='first')
piv2['diff'] = piv2[1.0] - piv2[2.0]
piv2.sort_values(['diff'], ascending=[False], inplace=True)
piv2.head(5)

In [None]:
cellid, contexts, probes = 'TNC006a-07-1', (2,10), 2 # huge difference between thresholds

meta = {'reliability': 0.1,  # r value
        'smoothing_window': 0,  # ms
        'raster_fs': 30,
        'montecarlo': 1000,
        'zscore': True,
        'stim_type': 'permutations'}


psth = plot_psth_pair(cellid, contexts, probes)

dprm_th_lo = plot_time_ser_quant(cellid, contexts, probes, source='real',
                             multiple_comparisons_axis=[1,2], consecutive=0, cluster_threshold=1.0, fn_name='dprime',
                             meta=meta)

dprm_th_hi = plot_time_ser_quant(cellid, contexts, probes, source='real',
                             multiple_comparisons_axis=[1,2], consecutive=0, cluster_threshold=2.0, fn_name='dprime',
                             meta=meta)
subplots = make_subplots(1,3)
subplots.update_layout(showlegend=False)
_ = subplots.add_traces(psth['data'], rows=[1]*len(psth['data']),cols=[1]*len(psth['data']))
_ = subplots.add_vline(x=0, line_width=2, line_color='black', line_dash='dot', opacity=1, row=1, col=1)
_ = subplots.add_traces(dprm_th_lo['data'], rows=[1]*len(dprm_th_lo['data']),cols=[2]*len(dprm_th_lo['data']))
_ = subplots.add_traces(dprm_th_hi['data'], rows=[1]*len(dprm_th_hi['data']),cols=[3]*len(dprm_th_hi['data']))

subplots.show()
# subplots_bytes = subplots.to_image(format="png")
# Image(subplots_bytes)

# check only good examples of the t-statistic-based metric distribution

In [None]:
idx = pd.IndexSlice
toplot = pivoted.loc[idx['real', 't_statistic', 'bf_cp', 0.05],:].reset_index()

scat = px.scatter(data_frame=toplot, x='duration', y='amplitude',
                  hover_data=['id', 'context_pair', 'probe'], color='region')

# _ = scat.update_layout(showlegend=False)
_ = scat.update_yaxes(matches=None)
scat.show()

In [None]:
cellid, contexts, probes = 'TNC006a-07-1', (2,10), 2
# cellid, contexts, probes = 'CRD011c-50-4', (0,4), 4

meta = {'reliability': 0.1,  # r value
        'smoothing_window': 0,  # ms
        'raster_fs': 30,
        'montecarlo': 1000,
        'zscore': True,
        'stim_type': 'permutations'}


psth = plot_psth_pair(cellid, contexts, probes)

dprm_th_hi = plot_time_ser_quant(cellid, contexts, probes, source='real',
                             multiple_comparisons_axis=None, consecutive=0, cluster_threshold=0.05, fn_name='t_statistic',
                             meta=meta)
subplots = make_subplots(1,2)
subplots.update_layout(showlegend=False)
_ = subplots.add_traces(psth['data'], rows=[1]*len(psth['data']),cols=[1]*len(psth['data']))
_ = subplots.add_vline(x=0, line_width=2, line_color='black', line_dash='dot', opacity=1, row=1, col=1)
_ = subplots.add_traces(dprm_th_hi['data'], rows=[1]*len(dprm_th_hi['data']),cols=[2]*len(dprm_th_hi['data']))

subplots.show()
# subplots_bytes = subplots.to_image(format="png")
# Image(subplots_bytes)