In [1]:
from ssri_interactions.interactions.preprocessors import InteractionsPreprocessor
from ssri_interactions.interactions.loaders import SpontaneousActivityLoader
from ssri_interactions.interactions.pairwise import PairwiseCorr
from ssri_interactions.io import load_neurons, load_derived_generic
import seaborn as sns
import pandas as pd
import numpy as np
from ssri_interactions.interactions.graph_clustering import SpectralCluster, ClusterRunner, ClusterEvaluation, ClusterResults
import warnings
from ssri_interactions.config import Config

dd = Config.derived_data_dir / "ensembles"
dd.mkdir(exist_ok=True)

  return warn(


In [2]:
def reorder_df_affinity(df, ensemble_labels):
    idx = np.argsort(ensemble_labels)
    return df.iloc[idx, idx]

def get_true(df, bin_width=1, shuffle=False):
    return df.query("bin_width == @bin_width and shuffle == @shuffle")

def get_labels_from_df_ensembles(df_ensembles, session):
    return (
        df_ensembles
        .query("session_name == @session")
        .sort_values("neuron_id")
        ["ensemble"]
        .values
    )

def get_affinity(session):
    loader = SpontaneousActivityLoader(bin_width=1, shuffle=False, session_name=session)
    preprocessor = InteractionsPreprocessor()
    pairwise = PairwiseCorr(rectify=True)

    spikes = preprocessor(loader())
    return pairwise.fit(spikes).get_adjacency_df()


In [3]:
sessions = load_neurons()["session_name"].unique()

preprocessor = InteractionsPreprocessor(z=True, minmax=False)
evaluator = ClusterEvaluation()

loader_fac = lambda bin_width, session, shuffle: SpontaneousActivityLoader(
    bin_width=bin_width, session_name=session, shuffle=shuffle
)
corr_fac = lambda: PairwiseCorr(
    remove_self_interactions=True,
    rectify=True,
    shuffle=False,
    remove_duplicate_combs=True,
)
cluster_fac = lambda: SpectralCluster(
    n_clusters=np.arange(2, 10), spectral_kws={"affinity": "precomputed"}, n_init=15
)

runner_fac = lambda bin_width, session, shuffle_load: ClusterRunner(
    loader=loader_fac(bin_width, session, shuffle_load),
    preprocessor=preprocessor,
    affinity_calculator=corr_fac(),
    clusterer=cluster_fac(),
    evalulator=evaluator,
)

### Dectect Ensembles at Multiple Timescales & Compare to Shuffle

In [4]:
ensemble_frames = []
ensemble_stats_frames = []
current_max = 1
bin_widths = (0.05, 0.1, 0.5, 1, 2)
nboot = 50

for shuffle in (False,):
    for bin_width in bin_widths:
        for session in sessions:

            # calculate ensembles
            runner = runner_fac(bin_width, session, shuffle)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                _, ensembles, _, ensemble_stats = runner.run(20)

            # update ensembles to global max
            ensembles["ensemble"] = np.where(
                ensembles["ensemble"] != -1, ensembles["ensemble"] + current_max, -1
            )
            ensemble_stats["ensemble"] = np.where(
                ensemble_stats["ensemble"] != -1,
                ensemble_stats["ensemble"] + current_max,
                -1,
            )
            current_max = ensembles["ensemble"].abs().max() + 1

            # add metadata and append to list of results
            ensembles = ensembles.assign(
                session_name=session, shuffle=shuffle, bin_width=bin_width
            )
            ensemble_stats = ensemble_stats.assign(
                session_name=session, shuffle=shuffle, bin_width=bin_width
            )
            ensemble_stats_frames.append(ensemble_stats)
            ensemble_frames.append(ensembles)


df_ensembles = pd.concat(ensemble_frames).reset_index(drop=True)
df_ensembles_stats = pd.concat(ensemble_stats_frames).reset_index(drop=True)

dd = Config.derived_data_dir / "ensembles"
dd.mkdir(exist_ok=True)
df_ensembles.to_csv(dd / "spont - ensembles.csv", index=False)
df_ensembles_stats.to_csv(dd / "spont - stats.csv", index=False)

### Subset Out 1 Sec, Non-Shuffled Ensembles & Calculate Descriptive Status

In [7]:
dd = Config.derived_data_dir / "ensembles"
df_ensembles = pd.read_csv(dd / "spont - ensembles.csv", )
df_ensembles_stats= pd.read_csv(dd / "spont - stats.csv")
evalulater = ClusterEvaluation()
true_ensembles = df_ensembles.pipe(get_true)
sessions = true_ensembles.session_name.unique()
partition_frames = []
stats_frames = []

for session in sessions:
    labs = get_labels_from_df_ensembles(true_ensembles, session)
    df_affinity = get_affinity(session)
    partition_frames.append(
        evalulater.evaluate_partition(df_affinity, labs)
        .assign(session_name=session)
    )
    stats_frames.append(
        evalulater.evaluate_communities(df_affinity, labs)
        .assign(session_name=session)
    )

stats = pd.concat(stats_frames).reset_index(drop=True)

stats["ensemble_id"] = pd.factorize(stats["community"].astype(str) + stats["session_name"])[0]
stats["ensemble_id"] = np.where(
    stats["community"] == -1,
    -1,
    stats["ensemble_id"],
)
true_ensembles = (
    stats
    .rename(columns=dict(community="ensemble"))
    [["session_name", "ensemble", "ensemble_id"]]
    .merge(true_ensembles, on=["session_name", "ensemble"])
)


neuron_types = load_derived_generic("neuron_types.csv")
descriptor = ClusterResults(
    neuron_types=neuron_types
)
stats = descriptor.ensemble_results(true_ensembles, stats)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [8]:
# response entropy

slow_responders_shock = load_derived_generic("slow_ts_foot_shock_unit_responders_pre_to_shock.csv")
fast_responders = load_derived_generic("fast_fs_foot_shock_unit_responders.csv").assign(response=lambda x: x.fs_fast_response)
second_window = load_derived_generic("fast_fs_foot_shock_unit_responders_second_window.csv").assign(response=lambda x: x.response_second_window)


res_slow = descriptor.ensemble_response_entropy(
    true_ensembles,
    slow_responders_shock,
    created_col="fs_slow_entropy",
)
fast_responders = descriptor.ensemble_response_entropy(
    true_ensembles,
    slow_responders_shock,
    created_col="fs_fast_entropy",
)
res_bs = descriptor.ensemble_response_entropy(
    true_ensembles,
    second_window,
    created_col="second_widnow_entropy",
)
response_entropy = res_slow.merge(fast_responders, how="outer").merge(res_bs, how="outer")

stats = stats.merge(response_entropy, how="outer")

In [9]:
dd = Config.derived_data_dir / "ensembles"
dd.mkdir(exist_ok=True)
true_ensembles.to_csv(dd / "spont - ensembles - true.csv", index=False)
stats.to_csv(dd / "spont - stats - true.csv", index=False)