In [2]:
from sklearn.model_selection import KFold
from lightgbm import LGBMClassifier

from ssri_interactions.io import load_derived_generic
from ssri_interactions.config import ExperimentInfo, Config
from ssri_interactions.decoding.loaders import FSDecodeDataLoader, FSFastDecodeDataLoader
from ssri_interactions.decoding.preprocessors import DecodePreprocessor
from ssri_interactions.decoding.runners import DecodeRunner
from ssri_interactions.decoding.decoders import Decoder
from ssri_interactions.decoding.shuffle import shuffle_X

# %load_ext autoreload
# %autoreload 2

  return warn(


# Slow TS

In [6]:
def make_slow_ts_runner(loader):
    preprocessor = DecodePreprocessor(
        thresh_empty=2,
    )
    spikes, states = loader()
    spikes, states = preprocessor(spikes, states)
    estimator = LGBMClassifier(n_estimators=50, max_depth=8, num_leaves=30)
    cv = KFold(n_splits=5, shuffle=True)
    decoder = Decoder(estimator=estimator, cv=cv, shuffler=shuffle_X)
    runner = DecodeRunner(
        loader=loader,
        preprocessor=preprocessor,
        decoder=decoder,
        nboot=75,
    )
    return runner

#### Loaders

In [10]:
neuron_types = load_derived_generic("neuron_types.csv").query("group in ('CIT', 'SAL')")
sessions = neuron_types.query("experiment_name == 'HAMILTON'").session_name.unique().tolist()
session = sessions[0]
loader_post_shock = FSDecodeDataLoader(session_name=session)
loader_shock_only = FSDecodeDataLoader(session_name=session, t_stop=600)


#### Shock Only

In [11]:
runner_shock_only = make_slow_ts_runner(loader_shock_only)

pop, unit = runner_shock_only.run_multiple(sessions)
limit = runner_shock_only.run_multiple_limit(sessions=sessions, n_min=1, n_max=15)
dropout = runner_shock_only.run_multiple_dropout(sessions=sessions, neuron_types=("SR", "SIR", "FF"))


dd = Config.derived_data_dir / "decoding"
dd.mkdir(exist_ok=True)
pop.to_csv(dd / "fs_slow - shock only - pop.csv", index=False)
unit.to_csv(dd / "fs_slow - shock only - unit.csv", index=False)
limit.to_csv(dd / "fs_slow - shock only - limit.csv", index=False)
dropout.to_csv(dd / "fs_slow - shock only - dropout.csv", index=False)

100%|██████████| 75/75 [00:15<00:00,  4.89it/s]
100%|██████████| 75/75 [00:11<00:00,  6.35it/s]
100%|██████████| 75/75 [00:12<00:00,  5.85it/s]
100%|██████████| 75/75 [00:11<00:00,  6.54it/s]
100%|██████████| 75/75 [00:18<00:00,  4.15it/s]
100%|██████████| 75/75 [00:15<00:00,  4.81it/s]
100%|██████████| 75/75 [00:16<00:00,  4.63it/s]
100%|██████████| 75/75 [00:16<00:00,  4.46it/s]
100%|██████████| 75/75 [00:15<00:00,  4.70it/s]


#### Post-included

In [25]:
runner_post_shock = make_slow_ts_runner(loader_post_shock)

pop, unit = runner_post_shock.run_multiple(sessions)
limit = runner_post_shock.run_multiple_limit(sessions=sessions, n_min=1, n_max=15)
dropout = runner_post_shock.run_multiple_dropout(sessions=sessions, neuron_types=("SR", "SIR", "FF"))


dd = Config.derived_data_dir / "decoding"
dd.mkdir(exist_ok=True)
pop.to_csv(dd / "fs_slow - post-included - pop.csv", index=False)
unit.to_csv(dd / "fs_slow - post-included - unit.csv", index=False)
limit.to_csv(dd / "fs_slow - post-included - limit.csv", index=False)
dropout.to_csv(dd / "fs_slow - post-included - dropout.csv", index=False)

100%|██████████| 75/75 [01:12<00:00,  1.04it/s]
100%|██████████| 75/75 [01:14<00:00,  1.01it/s]
100%|██████████| 75/75 [01:08<00:00,  1.10it/s]
100%|██████████| 75/75 [01:00<00:00,  1.23it/s]
100%|██████████| 75/75 [01:18<00:00,  1.05s/it]
100%|██████████| 75/75 [01:16<00:00,  1.01s/it]
100%|██████████| 75/75 [01:12<00:00,  1.03it/s]
100%|██████████| 75/75 [01:20<00:00,  1.07s/it]
100%|██████████| 75/75 [01:11<00:00,  1.05it/s]


Cannot have number of splits n_splits=5 greater than the number of samples: n_samples=0.


#### Export

In [29]:
from sklearn.decomposition import PCA, KernelPCA
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC


# sessions = ExperimentInfo.foot_shock_sessions_10min
# session = sessions[0]
loader = FSFastDecodeDataLoader(session_name=session, bin_width=0.1)
preprocessor = DecodePreprocessor(
    thresh_empty=1, 
)
spikes, states = loader()
spikes, states = preprocessor(spikes, states)

estimator = LGBMClassifier(n_estimators=40, max_depth=8, num_leaves=30)
estimator = make_pipeline(StandardScaler(), estimator)

cv = KFold(n_splits=5, shuffle=True)
decoder = Decoder(estimator=estimator, cv=cv, shuffler=shuffle_X)
runner = DecodeRunner(
    loader=loader,
    preprocessor=preprocessor,
    decoder=decoder,
    nboot=75,
)

In [30]:
pop, unit = runner.run_multiple(sessions)
limit = runner.run_multiple_limit(sessions=sessions, n_min=1, n_max=15)
dropout = runner.run_multiple_dropout(sessions=sessions, neuron_types=("SR", "SIR", "FF"))

100%|██████████| 75/75 [00:15<00:00,  4.84it/s]
100%|██████████| 75/75 [00:15<00:00,  4.82it/s]
100%|██████████| 75/75 [00:14<00:00,  5.04it/s]
100%|██████████| 75/75 [00:09<00:00,  7.56it/s]
100%|██████████| 75/75 [00:26<00:00,  2.85it/s]
100%|██████████| 75/75 [00:19<00:00,  3.84it/s]
100%|██████████| 75/75 [00:20<00:00,  3.60it/s]
100%|██████████| 75/75 [00:20<00:00,  3.64it/s]
100%|██████████| 75/75 [00:19<00:00,  3.79it/s]


Cannot have number of splits n_splits=5 greater than the number of samples: n_samples=1.


In [None]:
pop.to_csv(dd / "fs_fast - pop.csv", index=False)
unit.to_csv(dd / "fs_fast - unit.csv", index=False)
limit.to_csv(dd / "fs_fast - limit.csv", index=False)
dropout.to_csv(dd / "fs_fast - dropout.csv", index=False)

In [None]:
# SECOND WINDOW

loader = FSFastDecodeDataLoader(session_name=session, bin_width=0.1)
preprocessor = DecodePreprocessor(
    thresh_empty=1, 
)
spikes, states = loader()
spikes, states = preprocessor(spikes, states)

estimator = LGBMClassifier(n_estimators=40, max_depth=8, num_leaves=30)
estimator = make_pipeline(StandardScaler(), estimator)

cv = KFold(n_splits=5, shuffle=True)
decoder = Decoder(estimator=estimator, cv=cv, shuffler=shuffle_X)
runner = DecodeRunner(
    loader=loader,
    preprocessor=preprocessor,
    decoder=decoder,
    nboot=75,
)

In [None]:
pop, unit = runner.run_multiple(sessions)
limit = runner.run_multiple_limit(sessions=sessions, n_min=1, n_max=15)
dropout = runner.run_multiple_dropout(sessions=sessions, neuron_types=("SR", "SIR", "FF"))


pop.to_csv(dd / "fs_fast_second_window - pop.csv", index=False)
unit.to_csv(dd / "fs_fast_second_window - unit.csv", index=False)
limit.to_csv(dd / "fs_fast_second_window - limit.csv", index=False)
dropout.to_csv(dd / "fs_fast_second_window - dropout.csv", index=False)

## Offset Decode


In [None]:
from ssri_interactions.transforms.nbox_transforms import align_to_data_by
from spiketimes.df.surrogates import shuffled_isi_spiketrains_by
from ssri_interactions.decoding.offset_decoder import OffsetDecoder
from tqdm.notebook import tqdm
import numpy as np


def get_aligned(df_spikes, df_events,):
    df = align_to_data_by(
        df_spikes, 
        df_events,
        time_before_event=0.5,
        time_after_event=1.5,
        df_data_cell_col="neuron_id",
        df_data_group_col="session_name",
        df_events_group_colname="session_name",
        df_events_timestamp_col="event_s",
        df_data_time_col="spiketimes"
    )
    bins = np.arange(-0.5, 1.5, 0.1)
    df["bin"] = np.round(which_bin(df["aligned"].values, bins), 2)
    df = df.groupby(["neuron_id", "event", "bin"]).apply(len).to_frame("counts").reset_index()
    return df.pivot(index=["event", "bin"], columns="neuron_id", values="counts").fillna(0)

# USE PSTH CODE WITHOUT MERGING


def offset_decode(df_spikes, df_events, sessions, estimator, cv, scoring="f1_macro"):
    neurons_sub = df_spikes[["neuron_id", "session_name"]].drop_duplicates()
    out = []
    for session in tqdm(sessions):
        shuffled = shuffled_isi_spiketrains_by(df_spikes, by_col="neuron_id").merge(neurons_sub)
        spikes_true = get_aligned(
            df_spikes.query(f"session_name == '{session}'"),
            df_events
            )
        spikes_fake = get_aligned(
            shuffled.query(f"session_name == '{session}'"),
            df_events
        )
        decoder = OffsetDecoder(estimator=clone(estimator), cv=cv, scoring=scoring)
        res = decoder.fit_models([(spikes_true, spikes_fake)])
        out.append(res.assign(session_name = session))
    return out