In [1]:
from ssri_interactions.interactions.preprocessors import InteractionsPreprocessor
from ssri_interactions.interactions.loaders import SpontaneousActivityLoader
from ssri_interactions.interactions.pairwise import PairwiseCorr
from ssri_interactions.io import load_neurons, load_derived_generic
import seaborn as sns
import pandas as pd
import numpy as np
from ssri_interactions.interactions.graph_clustering import SpectralCluster, ClusterRunner, ClusterEvaluation, ClusterResults
import warnings
from ssri_interactions.config import Config

dd = Config.derived_data_dir / "ensembles"
dd.mkdir(exist_ok=True)

  return warn(


In [2]:
def reorder_df_affinity(df, ensemble_labels):
    idx = np.argsort(ensemble_labels)
    return df.iloc[idx, idx]

def get_true(df, bin_width=1, shuffle=False):
    return df.query("bin_width == @bin_width and shuffle == @shuffle")

def get_labels_from_df_ensembles(df_ensembles, session):
    return (
        df_ensembles
        .query("session_name == @session")
        .sort_values("neuron_id")
        ["ensemble"]
        .values
    )

def get_affinity(session):
    loader = SpontaneousActivityLoader(bin_width=1, shuffle=False, session_name=session)
    preprocessor = InteractionsPreprocessor()
    pairwise = PairwiseCorr(rectify=True)

    spikes = preprocessor(loader())
    return pairwise.fit(spikes).get_adjacency_df()


In [3]:
sessions = load_neurons()["session_name"].unique()

preprocessor = InteractionsPreprocessor(z=True, minmax=False)
evaluator = ClusterEvaluation()

loader_fac = lambda bin_width, session, shuffle: SpontaneousActivityLoader(
    bin_width=bin_width, session_name=session, shuffle=shuffle
)
corr_fac = lambda: PairwiseCorr(
    remove_self_interactions=True,
    rectify=True,
    shuffle=False,
    remove_duplicate_combs=True,
)
cluster_fac = lambda: SpectralCluster(
    n_clusters=np.arange(2, 8), spectral_kws={"affinity": "precomputed"}, n_init=5
)

runner_fac = lambda bin_width, session, shuffle_load: ClusterRunner(
    loader=loader_fac(bin_width, session, shuffle_load),
    preprocessor=preprocessor,
    affinity_calculator=corr_fac(),
    clusterer=cluster_fac(),
    evalulator=evaluator,
)

### Dectect Ensembles at Multiple Timescales & Compare to Shuffle

In [4]:
ensemble_frames = []
ensemble_stats_frames = []
current_max = 1
bin_widths = (0.05, 0.1, 0.5, 1, 2)
nboot = 50

for shuffle in (False,):
    for bin_width in bin_widths:
        for session in sessions:

            # calculate ensembles
            runner = runner_fac(bin_width, session, shuffle)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                _, ensembles, _, ensemble_stats = runner.run(20)

            # update ensembles to global max
            ensembles["ensemble"] = np.where(
                ensembles["ensemble"] != -1, ensembles["ensemble"] + current_max, -1
            )
            ensemble_stats["ensemble"] = np.where(
                ensemble_stats["ensemble"] != -1,
                ensemble_stats["ensemble"] + current_max,
                -1,
            )
            current_max = ensembles["ensemble"].abs().max() + 1

            # add metadata and append to list of results
            ensembles = ensembles.assign(
                session_name=session, shuffle=shuffle, bin_width=bin_width
            )
            ensemble_stats = ensemble_stats.assign(
                session_name=session, shuffle=shuffle, bin_width=bin_width
            )
            ensemble_stats_frames.append(ensemble_stats)
            ensemble_frames.append(ensembles)


df_ensembles = pd.concat(ensemble_frames).reset_index(drop=True)
df_ensembles_stats = pd.concat(ensemble_stats_frames).reset_index(drop=True)

dd = Config.derived_data_dir / "ensembles"
dd.mkdir(exist_ok=True)
df_ensembles.to_csv(dd / "spont - ensembles.csv", index=False)
df_ensembles_stats.to_csv(dd / "spont - stats.csv", index=False)

### Subset Out 1 Sec, Non-Shuffled Ensembles & Calculate Descriptive Status

In [5]:
# make true df_ensemble and df_ensembles_stats and global ensemble ID

df_ensembles = pd.read_csv(dd / "spont - ensembles.csv", )
df_ensembles_stats= pd.read_csv(dd / "spont - stats.csv")


partition_frames = []
stats_frames = []
evalulater = ClusterEvaluation()
true_ensembles = df_ensembles.pipe(get_true)
for session in sessions:
    labs = get_labels_from_df_ensembles(true_ensembles, session)
    df_affinity = get_affinity(session)
    partition_frames.append(
        evalulater.evaluate_partition(df_affinity, labs)
        .assign(session_name=session)
    )
    stats_frames.append(
        evalulater.evaluate_communities(df_affinity, labs)
        .assign(session_name=session)
    )

stats = pd.concat(stats_frames).reset_index(drop=True)
stats["ensemble_id"] = pd.factorize(stats["community"].astype(str) + stats["session_name"])[0]
stats["ensemble_id"] = np.where(
    stats["community"] == -1,
    -1,
    stats["ensemble_id"],
)
true_ensembles = (
    stats
    .rename(columns=dict(community="ensemble"))
    [["session_name", "ensemble", "ensemble_id"]]
    .merge(true_ensembles, on=["session_name", "ensemble"])
)


neuron_types = load_derived_generic("neuron_types.csv")
descriptor = ClusterResults(
    neuron_types=neuron_types
)
stats = descriptor.ensemble_results(true_ensembles, stats)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [12]:
(
    load_derived_generic("slow_ts_foot_shock_unit_responders_pre_to_shock.csv")
    # [["neuron_id", "Diff", "sig", "response"]]
    # .assign(
    #     response=lambda x: np.where(
    #         x["sig"] == False,
    #         "no_response",
    #         np.where(x["Diff"] < 0, "inhibited", "activated"),
    #     )
    # )
)

Unnamed: 0,A,B,mean(A),mean(B),diff,se,T,p-tukey,hedges,neuron_id,diff_inv,p_adj,sig,div_inv,response,session_name,neuron_type
0,1Pre,2Shock,-0.0,-0.16,0.16,0.05,3.18,0.00,0.18,1069,-0.16,0.00,False,-0.16,non responder,hamilton_01,SIR
1,1Pre,2Shock,-0.0,1.79,-1.79,0.06,-31.53,0.00,-1.82,1070,1.79,0.00,True,1.79,activated,hamilton_01,SIR
2,1Pre,2Shock,-0.0,1.42,-1.42,0.08,-18.13,0.00,-1.05,1071,1.42,0.00,True,1.42,activated,hamilton_01,FF
3,1Pre,2Shock,0.0,0.31,-0.31,0.07,-4.36,0.00,-0.25,1072,0.31,0.00,True,0.31,activated,hamilton_01,FF
4,1Pre,2Shock,0.0,-0.14,0.14,0.06,2.52,0.03,0.15,1074,-0.14,0.04,False,-0.14,non responder,hamilton_01,SIR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459,1Pre,2Shock,-0.0,-2.60,2.60,0.06,46.65,0.00,2.69,2282,-2.60,0.00,True,-2.60,inhibited,hamilton_30,SR
460,1Pre,2Shock,-0.0,0.69,-0.69,0.07,-10.51,0.00,-0.61,2283,0.69,0.00,True,0.69,activated,hamilton_30,SR
461,1Pre,2Shock,-0.0,-0.38,0.38,0.05,7.04,0.00,0.41,2284,-0.38,0.00,True,-0.38,inhibited,hamilton_30,SIR
462,1Pre,2Shock,-0.0,0.01,-0.01,0.05,-0.23,0.97,-0.01,2285,0.01,0.98,False,0.01,non responder,hamilton_30,SR


In [15]:
# response entropy

slow_responders_shock = load_derived_generic("slow_ts_foot_shock_unit_responders_pre_to_shock.csv")
fast_responders = load_derived_generic("fast_fs_foot_shock_unit_responders.csv").assign(response=lambda x: x.fs_fast_response)
second_window = load_derived_generic("fast_fs_foot_shock_unit_responders_second_window.csv").assign(response=lambda x: x.response_second_window)


res_slow = descriptor.ensemble_response_entropy(
    true_ensembles,
    slow_responders_shock,
    created_col="fs_slow_entropy",
)
fast_responders = descriptor.ensemble_response_entropy(
    true_ensembles,
    slow_responders_shock,
    created_col="fs_fast_entropy",
)
res_bs = descriptor.ensemble_response_entropy(
    true_ensembles,
    second_window,
    created_col="second_widnow_entropy",
)
response_entropy = res_slow.merge(fast_responders, how="outer").merge(res_bs, how="outer")

stats = stats.merge(response_entropy, how="outer")

In [21]:
dd = Config.derived_data_dir / "ensembles"
dd.mkdir(exist_ok=True)
true_ensembles.to_csv(dd / "spont - ensembles - true.csv", index=False)
stats.to_csv(dd / "spont - stats - true.csv", index=False)

### Ensemble Props

In [18]:
dd = Config.derived_data_dir / "ensembles"
dd.mkdir(exist_ok=True)


neuron_types = load_derived_generic("neuron_types.csv")
df_ensembles = (
    pd.read_csv(dd / "spont - ensembles - true.csv")
    .merge(neuron_types[["neuron_id", "neuron_type"]])
)
stats = pd.read_csv(dd / "spont - stats - true.csv")

In [19]:
from ssri_interactions.stats import se_prop
from scipy.stats import chi2_contingency


df_ensembles["in_ensemble"] = np.where(
    df_ensembles["ensemble"] != -1, 1, 0
    )

prop = df_ensembles["in_ensemble"].mean() * 100
se = se_prop(df_ensembles["in_ensemble"]) * 200

print(f"Proportion of units in an ensemble {prop:.2f}% +- {se:.2f}")

X = pd.crosstab(
    df_ensembles["in_ensemble"], df_ensembles["neuron_type"]
    )

stat, p, _, _ = chi2_contingency(X)

print(f"stat = {stat:.2f}.\np = {p:.2f}")

display(X)

Xt = pd.crosstab(
    df_ensembles["in_ensemble"], df_ensembles["neuron_type"], normalize="index"
    )
display(Xt.multiply(100).round(2))

Proportion of units in an ensemble 69.52% +- 3.57
stat = 0.58.
p = 0.75


neuron_type,FF,SIR,SR
in_ensemble,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,18,100,85
1,35,222,206


neuron_type,FF,SIR,SR
in_ensemble,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,8.87,49.26,41.87
1,7.56,47.95,44.49


In [13]:
from drn_interactions.stats import se_mean

def mean_se(x, round=2):
    return f"{x.mean().round(round)} +- {se_mean(x).round(2)}"

(
    stats
    .query("ensemble_id != -1")
    .assign(normalized_volume=lambda x: x["volume"] / x["size"])
    [["size", "average_weight", "average_weight_out"]]
    .apply(mean_se)
    .to_frame("Mean +- SE")
    .T
)

Unnamed: 0,size,average_weight,average_weight_out
Mean +- SE,11.32 +- 1.1,0.3 +- 0.02,0.13 +- 0.01
