In [None]:
from sklearn.model_selection import KFold
from lightgbm import LGBMClassifier

from ssri_interactions.io import load_derived_generic
from ssri_interactions.config import ExperimentInfo, Config
from ssri_interactions.decoding.loaders import FSDecodeDataLoader, FSFastDecodeDataLoader
from ssri_interactions.decoding.preprocessors import DecodePreprocessor
from ssri_interactions.decoding.runners import DecodeRunner
from ssri_interactions.decoding.decoders import Decoder
from ssri_interactions.decoding.shuffle import shuffle_X

# %load_ext autoreload
# %autoreload 2

# Slow TS

In [None]:
def make_slow_ts_runner(loader):
    preprocessor = DecodePreprocessor(
        thresh_empty=2,
    )
    spikes, states = loader()
    spikes, states = preprocessor(spikes, states)
    estimator = LGBMClassifier(n_estimators=50, max_depth=8, num_leaves=30)
    cv = KFold(n_splits=5, shuffle=True)
    decoder = Decoder(estimator=estimator, cv=cv, shuffler=shuffle_X)
    runner = DecodeRunner(
        loader=loader,
        preprocessor=preprocessor,
        decoder=decoder,
        nboot=75,
    )
    return runner

#### Loaders

In [None]:
neuron_types = load_derived_generic("neuron_types.csv").query("group in ('CIT', 'SAL')")
sessions = neuron_types.query("experiment_name == 'HAMILTON'").session_name.unique().tolist()
session = sessions[0]
loader_post_shock = FSDecodeDataLoader(session_name=session)
loader_shock_only = FSDecodeDataLoader(session_name=session, t_stop=600)


#### Shock Only

In [None]:
runner_shock_only = make_slow_ts_runner(loader_shock_only)

pop, unit = runner_shock_only.run_multiple(sessions)
limit = runner_shock_only.run_multiple_limit(sessions=sessions, n_min=1, n_max=15)
dropout = runner_shock_only.run_multiple_dropout(sessions=sessions, neuron_types=("SR", "SIR", "FF"))


dd = Config.derived_data_dir / "decoding"
dd.mkdir(exist_ok=True)
pop.to_csv(dd / "fs_slow - shock only - pop.csv", index=False)
unit.to_csv(dd / "fs_slow - shock only - unit.csv", index=False)
limit.to_csv(dd / "fs_slow - shock only - limit.csv", index=False)
dropout.to_csv(dd / "fs_slow - shock only - dropout.csv", index=False)

#### Post-included

In [None]:
runner_post_shock = make_slow_ts_runner(loader_post_shock)

pop, unit = runner_post_shock.run_multiple(sessions)
limit = runner_post_shock.run_multiple_limit(sessions=sessions, n_min=1, n_max=15)
dropout = runner_post_shock.run_multiple_dropout(sessions=sessions, neuron_types=("SR", "SIR", "FF"))


dd = Config.derived_data_dir / "decoding"
dd.mkdir(exist_ok=True)
pop.to_csv(dd / "fs_slow - post-included - pop.csv", index=False)
unit.to_csv(dd / "fs_slow - post-included - unit.csv", index=False)
limit.to_csv(dd / "fs_slow - post-included - limit.csv", index=False)
dropout.to_csv(dd / "fs_slow - post-included - dropout.csv", index=False)

#### Export

In [None]:
from sklearn.decomposition import PCA, KernelPCA
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC


# sessions = ExperimentInfo.foot_shock_sessions_10min
# session = sessions[0]
loader = FSFastDecodeDataLoader(session_name=None, bin_width=0.1)
preprocessor = DecodePreprocessor(
    thresh_empty=1, 
)
spikes, states = loader()
spikes, states = preprocessor(spikes, states)

estimator = LGBMClassifier(n_estimators=40, max_depth=8, num_leaves=30)
estimator = make_pipeline(StandardScaler(), estimator)

cv = KFold(n_splits=5, shuffle=True)
decoder = Decoder(estimator=estimator, cv=cv, shuffler=shuffle_X)
runner = DecodeRunner(
    loader=loader,
    preprocessor=preprocessor,
    decoder=decoder,
    nboot=75,
)

In [None]:
pop, unit = runner.run_multiple(sessions)
limit = runner.run_multiple_limit(sessions=sessions, n_min=1, n_max=15)
dropout = runner.run_multiple_dropout(sessions=sessions, neuron_types=("SR", "SIR", "FF"))

In [None]:
pop.to_csv(dd / "fs_fast - pop.csv", index=False)
unit.to_csv(dd / "fs_fast - unit.csv", index=False)
limit.to_csv(dd / "fs_fast - limit.csv", index=False)
dropout.to_csv(dd / "fs_fast - dropout.csv", index=False)

In [None]:
# SECOND WINDOW


from sklearn.decomposition import PCA, KernelPCA
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC
import numpy as np
import pandas as pd
import seaborn as sns


def make_fast_ts_runner(window, bin_width=0.1):
    loader = FSFastDecodeDataLoader(session_name=None, bin_width=bin_width, window=window)
    preprocessor = DecodePreprocessor(
        thresh_empty=1, 
    )
    spikes, states = loader()
    spikes, states = preprocessor(spikes, states)
    estimator = LGBMClassifier(n_estimators=40, max_depth=8, num_leaves=30)
    cv = KFold(n_splits=5, shuffle=True)
    decoder = Decoder(estimator=estimator, cv=cv, shuffler=shuffle_X)
    runner = DecodeRunner(
        loader=loader,
        preprocessor=preprocessor,
        decoder=decoder,
        nboot=75,
    )
    return runner

def make_windows(window_size=0.2, t_start=-0.5, t_stop=2):
    window_starts = np.arange(t_start, t_stop, window_size)
    windows = [(round(start, 4), round(start + window_size, 4)) for start in window_starts]
    return windows


neuron_types = load_derived_generic("neuron_types.csv")
sessions = neuron_types.query("experiment_name == 'HAMILTON' and group in ('SAL', 'CIT')").session_name.unique().tolist()


pop_res = []
unit_res = []
windows = make_windows(window_size=0.1, t_start=-0.5, t_stop=1.5)
for window in windows:
    print(window)
    runner = make_fast_ts_runner(window, bin_width=0.1)
    pop, unit = runner.run_multiple(sessions)
    pop_res.append(pop.assign(window=f"{window[0]} - {window[1]}", window_start=window[0], window_stop=window[1]))
    unit_res.append(unit.assign(window=f"{window[0]} - {window[1]}", window_start=window[0], window_stop=window[1]))



In [None]:
dd = Config.derived_data_dir / "decoding"
dd.mkdir(exist_ok=True)

df_pop_window = (
    pd.concat(pop_res)
    .assign(window_start = lambda x: (x.window_start.astype(float) + 0.1).round(3))
)
df_unit_window = (
    pd.concat(unit_res)
    .assign(window_start = lambda x: (x.window_start.astype(float) + 0.1).round(3))
)


df_pop_window.to_csv(dd / "fs_fast - pop - windows.csv", index=False)
df_unit_window.to_csv(dd / "fs_fast - unit - windows.csv", index=False)

In [None]:
import matplotlib.pyplot as plt
from ssri_interactions.plots import PAL_GREY_BLACK

f = plt.figure(figsize=(5.5, 1))
ax_line = f.subplots()
(
    pd.concat(pop_res)
    .assign(window_start = lambda x: (x.window_start.astype(float) + 0.1).round(3))
    .merge(neuron_types[["session_name", "group"]].drop_duplicates())
    .query("group in ('SAL', 'CIT')")
    .pipe(
        lambda x: 
            sns.pointplot(
                data=x, 
                x="window_start", 
                y="pop_true", 
                hue="group", 
                estimator=np.mean,
                palette=PAL_GREY_BLACK[::-1],
                ax=ax_line,
                dodge=True,
                scale=0.55,
                errwidth=1.5
                )
        )
)


ax_line.set_ylabel("Decoding\nPerformance\n(F1 Score)")
ax_line.set_xlabel("Time Relative to Shock (s)")
ax_line.axhline(0.5, color="grey", linewidth=0.5, linestyle="--")
ax_line.axhline(1, color="grey", linewidth=0.5, linestyle="--")
f.subplots_adjust(wspace=1, hspace=1.4)
sns.despine(ax=ax_line, offset=5)
ax_line.legend_.remove()
ax_line.set_ylim(0.45, 0.9)
ax_line.set_xticks(ax_line.get_xticks()[::2])
ax_line.set_xlim(0, 15)
plt.show()



## Offset Decode


In [None]:
from ssri_interactions.transforms.nbox_transforms import align_to_data_by
from ssri_interactions.decoding.offset_decoder import OffsetDecoder
from ssri_interactions.io import load_spikes, load_events
from ssri_interactions.transforms import SpikesHandler
from spiketimes.df.surrogates import shuffled_isi_spiketrains_by
from binit.bin import which_bin
from sklearn.base import clone
from tqdm import tqdm
import numpy as np

def get_aligned(df_spikes, df_events,):
    df = align_to_data_by(
        df_spikes, 
        df_events,
        time_before_event=0.5,
        time_after_event=1.5,
        df_data_group_col="session_name",
        df_events_group_colname="session_name",
        df_events_timestamp_col="event_s",
        df_data_time_col="spiketimes"
    )
    bins = np.arange(-0.5, 1.5, 0.1)
    df["bin"] = np.round(which_bin(df["aligned"].values, bins), 2)
    df = df.groupby(["neuron_id", "event", "bin"]).apply(len).to_frame("counts").reset_index()
    return df.pivot(index=["event", "bin"], columns="neuron_id", values="counts").fillna(0)

# USE PSTH CODE WITHOUT MERGING


def offset_decode(df_spikes, df_events, sessions, estimator, cv, scoring="f1_macro"):
    neurons_sub = df_spikes[["neuron_id", "session_name"]].drop_duplicates()
    out = []
    for session in tqdm(sessions):
        shuffled = shuffled_isi_spiketrains_by(df_spikes, by_col="neuron_id").merge(neurons_sub)
        spikes_true = get_aligned(
            df_spikes.query(f"session_name == '{session}'"),
            df_events
            )
        spikes_fake = get_aligned(
            shuffled.query(f"session_name == '{session}'"),
            df_events
        )
        decoder = OffsetDecoder(estimator=clone(estimator), cv=cv, scoring=scoring)
        res = decoder.fit_models([(spikes_true, spikes_fake)])
        out.append(res.assign(session_name = session))
    return out




In [None]:
neuron_types = load_derived_generic("neuron_types.csv")
sessions = neuron_types.query("experiment_name == 'HAMILTON'").session_name.unique().tolist()
df_spikes = SpikesHandler(block="base_shock", bin_width=0.1, session_names=sessions).spikes
df_events = load_events(block_name="base_shock")
estimator = LGBMClassifier(n_estimators=40, max_depth=8, num_leaves=30)

cv = KFold(n_splits=5, shuffle=True)

df_offset = offset_decode(
    df_spikes=df_spikes, 
    df_events=df_events, 
    sessions=sessions, 
    estimator=estimator, 
    cv=cv
    )


In [None]:
import seaborn as sns
df_res = pd.concat(df_offset)

(
    df_res
    .merge(neuron_types[["session_name", "group"]].drop_duplicates())
    .query("group in ('SAL', 'CIT')")
    .pipe(lambda x: sns.catplot(data=x, x="offset", y="f1score", hue="group", kind="point"))
)
plt.show()

In [None]:
df_res = pd.concat(df_offset)
df_res.to_csv(
    dd / "fs_fast - offset - decoding.csv",
)

In [None]:
df_res