In [3]:
from sklearn.model_selection import KFold
from lightgbm import LGBMClassifier

from ssri_interactions.io import load_derived_generic
from ssri_interactions.config import ExperimentInfo, Config
from ssri_interactions.decoding.loaders import FSDecodeDataLoader, FSFastDecodeDataLoader
from ssri_interactions.decoding.preprocessors import DecodePreprocessor
from ssri_interactions.decoding.runners import DecodeRunner
from ssri_interactions.decoding.decoders import Decoder
from ssri_interactions.decoding.shuffle import shuffle_X


# %load_ext autoreload
# %autoreload 2

In [9]:
neuron_types = load_derived_generic("neuron_types.csv").query("group in ('CIT', 'SAL')")
sessions = neuron_types.query("experiment_name == 'HAMILTON'").session_name.unique().tolist()
session = sessions[0]
loader = FSDecodeDataLoader(session_name=session)
preprocessor = DecodePreprocessor(
    thresh_empty=2,
)
spikes, states = loader()
spikes, states = preprocessor(spikes, states)

estimator = LGBMClassifier(n_estimators=50, max_depth=8, num_leaves=30)
cv = KFold(n_splits=5, shuffle=True)
decoder = Decoder(estimator=estimator, cv=cv, shuffler=shuffle_X)
runner = DecodeRunner(
    loader=loader,
    preprocessor=preprocessor,
    decoder=decoder,
    nboot=75,
)

In [10]:
pop, unit = runner.run_multiple(sessions)
limit = runner.run_multiple_limit(sessions=sessions, n_min=1, n_max=31)
dropout = runner.run_multiple_dropout(sessions=sessions, neuron_types=("SR", "SIR", "FF"))

100%|██████████| 75/75 [08:20<00:00,  6.67s/it]
100%|██████████| 75/75 [38:57<00:00, 31.17s/it]   
100%|██████████| 75/75 [12:25<00:00,  9.94s/it]  
100%|██████████| 75/75 [07:27<00:00,  5.97s/it]
100%|██████████| 75/75 [06:56<00:00,  5.55s/it]
100%|██████████| 75/75 [06:55<00:00,  5.54s/it]
100%|██████████| 75/75 [07:00<00:00,  5.60s/it]
100%|██████████| 75/75 [06:35<00:00,  5.27s/it]
100%|██████████| 75/75 [07:43<00:00,  6.18s/it]


Cannot have number of splits n_splits=5 greater than the number of samples: n_samples=1.
Cannot have number of splits n_splits=5 greater than the number of samples: n_samples=1.


In [12]:
dd = Config.derived_data_dir / "decoding"
dd.mkdir(exist_ok=True)
pop.to_csv(dd / "fs_slow - pop.csv", index=False)
unit.to_csv(dd / "fs_slow - unit.csv", index=False)
limit.to_csv(dd / "fs_slow - limit.csv", index=False)
dropout.to_csv(dd / "fs_slow - dropout.csv", index=False)

In [14]:
from sklearn.decomposition import PCA, KernelPCA
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC


# sessions = ExperimentInfo.foot_shock_sessions_10min
# session = sessions[0]
loader = FSFastDecodeDataLoader(session_name=session, bin_width=0.1)
preprocessor = DecodePreprocessor(
    thresh_empty=1, 
)
spikes, states = loader()
spikes, states = preprocessor(spikes, states)

estimator = LGBMClassifier(n_estimators=40, max_depth=8, num_leaves=30)
estimator = make_pipeline(StandardScaler(), estimator)

cv = KFold(n_splits=5, shuffle=True)
decoder = Decoder(estimator=estimator, cv=cv, shuffler=shuffle_X)
runner = DecodeRunner(
    loader=loader,
    preprocessor=preprocessor,
    decoder=decoder,
    nboot=5,
)

In [15]:
pop, unit = runner.run_multiple(sessions)
limit = runner.run_multiple_limit(sessions=sessions, n_min=1, n_max=31)
dropout = runner.run_multiple_dropout(sessions=sessions, neuron_types=("SR", "SIR", "FF"))

100%|██████████| 5/5 [00:08<00:00,  1.70s/it]
100%|██████████| 5/5 [00:08<00:00,  1.69s/it]
100%|██████████| 5/5 [00:07<00:00,  1.50s/it]
100%|██████████| 5/5 [00:05<00:00,  1.09s/it]
100%|██████████| 5/5 [00:08<00:00,  1.78s/it]
100%|██████████| 5/5 [00:08<00:00,  1.77s/it]
100%|██████████| 5/5 [00:08<00:00,  1.68s/it]
100%|██████████| 5/5 [00:08<00:00,  1.72s/it]
100%|██████████| 5/5 [00:08<00:00,  1.72s/it]


In [17]:
pop.to_csv(dd / "fs_fast - pop.csv", index=False)
unit.to_csv(dd / "fs_fast - unit.csv", index=False)
limit.to_csv(dd / "fs_fast - limit.csv", index=False)
dropout.to_csv(dd / "fs_fast - dropout.csv", index=False)