In [4]:
from sklearn.model_selection import KFold
from lightgbm import LGBMRegressor
from sklearn.compose import make_column_transformer, make_column_selector, TransformedTargetRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import KFold


from drn_interactions.config import ExperimentInfo

from drn_interactions.decoding.loaders import FSDecodeDataLoader, FSFastDecodeDataLoader
from drn_interactions.decoding.preprocessors import DecodePreprocessor
from drn_interactions.decoding.encoders import StateEncoder
from drn_interactions.decoding.runners import EncodeRunner
from drn_interactions.decoding.shuffle import shuffle_X

In [5]:
sessions = ExperimentInfo.foot_shock_sessions_10min
session = sessions[0]
loader = FSDecodeDataLoader(session_name=session)
preprocessor = DecodePreprocessor(
    thresh_empty=2,
)
spikes, states = loader()
spikes, states = preprocessor(spikes, states)

ct = make_column_transformer(
    (
        StandardScaler(),
        make_column_selector(dtype_exclude=object)
    ),  
    (
        OneHotEncoder(drop="if_binary"),
        make_column_selector(dtype_include=object)
    ), 
)

estimator = make_pipeline(ct, 
    LGBMRegressor(
        n_estimators=20, 
        n_jobs=-1, 
        force_row_wise=True,
        reg_lambda=0.8,
        )
)

estimator = TransformedTargetRegressor(estimator, transformer=StandardScaler())
cv = KFold(shuffle=True)
encoder = StateEncoder(estimator=estimator, cv=cv, verbose=True)
runner = EncodeRunner(
    loader=loader,
    preprocessor=preprocessor,
    encoder=encoder,
)

In [6]:
pop = runner.run_multiple_pop(sessions=sessions)
pop_shuffle = runner.run_multiple_pop(sessions=sessions, shuffle=True)
state = runner.run_multiple_state(sessions=sessions)
state_shuffle = runner.run_multiple_state(sessions=sessions, shuffle=True)
limit = runner.run_multiple_limit(sessions=sessions, min_features=1, max_features=30)
dropout = runner.run_multiple_dropout(sessions=sessions)

100%|██████████| 16/16 [00:03<00:00,  4.23it/s]
100%|██████████| 15/15 [00:03<00:00,  4.03it/s]
100%|██████████| 52/52 [00:19<00:00,  2.66it/s]
100%|██████████| 33/33 [00:09<00:00,  3.32it/s]
100%|██████████| 37/37 [00:11<00:00,  3.18it/s]
100%|██████████| 51/51 [00:16<00:00,  3.15it/s]
100%|██████████| 24/24 [00:06<00:00,  3.75it/s]
100%|██████████| 31/31 [00:08<00:00,  3.55it/s]
100%|██████████| 16/16 [00:03<00:00,  4.00it/s]
100%|██████████| 15/15 [00:03<00:00,  4.00it/s]
100%|██████████| 52/52 [00:16<00:00,  3.15it/s]
100%|██████████| 33/33 [00:09<00:00,  3.52it/s]
100%|██████████| 37/37 [00:10<00:00,  3.48it/s]
100%|██████████| 51/51 [00:16<00:00,  3.16it/s]
100%|██████████| 24/24 [00:06<00:00,  3.84it/s]
100%|██████████| 31/31 [00:08<00:00,  3.69it/s]
100%|██████████| 16/16 [00:01<00:00, 10.54it/s]
100%|██████████| 15/15 [00:01<00:00, 11.09it/s]
100%|██████████| 52/52 [00:04<00:00, 10.79it/s]
100%|██████████| 33/33 [00:03<00:00, 10.68it/s]
100%|██████████| 37/37 [00:03<00:00, 10.

In [7]:

from drn_interactions.config import Config


dd = Config.derived_data_dir / "encoding"

pop.to_csv(dd / "fs_slow - pop.csv", index=False)
pop_shuffle.to_csv(dd / "fs_slow - pop shuffle.csv", index=False)
state.to_csv(dd / "fs_slow - state.csv", index=False)
state_shuffle.to_csv(dd / "fs_slow - state shuffle.csv", index=False)
limit.to_csv(dd / "fs_slow - limit.csv", index=False)
dropout.to_csv(dd / "fs_slow - dropout.csv", index=False)
    

In [5]:
sessions = ExperimentInfo.foot_shock_sessions_10min
session = sessions[0]
loader = FSFastDecodeDataLoader(session_name=session)
preprocessor = DecodePreprocessor(
    thresh_empty=2,
)
spikes, states = loader()
spikes, states = preprocessor(spikes, states)

ct = make_column_transformer(
    (
        StandardScaler(),
        make_column_selector(dtype_exclude=object)
    ),  
    (
        OneHotEncoder(drop="if_binary"),
        make_column_selector(dtype_include=object)
    ), 
)

estimator = make_pipeline(ct, 
    LGBMRegressor(
        n_estimators=20, 
        n_jobs=-1, 
        force_row_wise=True,
        reg_lambda=0.8,
        )
)

estimator = TransformedTargetRegressor(estimator, transformer=StandardScaler())
cv = KFold(shuffle=True)
encoder = StateEncoder(estimator=estimator, cv=cv, verbose=True)
runner = EncodeRunner(
    loader=loader,
    preprocessor=preprocessor,
    encoder=encoder,
)

In [6]:
dd = Config.derived_data_dir / "encoding"

pop.to_csv(dd / "fs_fast - pop.csv", index=False)
state.to_csv(dd / "fs_fast - state.csv", index=False)
limit.to_csv(dd / "fs_fast - limit.csv", index=False)
dropout.to_csv(dd / "fs_fast - dropout.csv", index=False)
    