In [None]:
from sklearn.model_selection import KFold
from lightgbm import LGBMRegressor
from sklearn.compose import make_column_transformer, make_column_selector, TransformedTargetRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import KFold


from drn_interactions.config import ExperimentInfo

from drn_interactions.decoding.loaders import StateDecodeDataLoader
from drn_interactions.decoding.preprocessors import StateDecodePreprocessor
from drn_interactions.decoding.encoders import StateEncoder
from drn_interactions.decoding.runners import EncodeRunner
from drn_interactions.decoding.shuffle import shuffle_X
from drn_interactions.config import Config

In [None]:
sessions = ExperimentInfo.eeg_sessions
session = sessions[0]
loader = StateDecodeDataLoader(block="pre", t_stop=1800, session_name=session)
preprocessor = StateDecodePreprocessor(
    thresh_empty=2,
)
spikes, states = loader()
spikes, states = preprocessor(spikes, states)

ct = make_column_transformer(
    (
        StandardScaler(),
        make_column_selector(dtype_exclude=object)
    ),  
    (
        OneHotEncoder(drop="if_binary"),
        make_column_selector(dtype_include=object)
    ), 
)

estimator = make_pipeline(ct, 
    LGBMRegressor(
        n_estimators=20, 
        n_jobs=-1, 
        force_row_wise=True,
        reg_lambda=0.8,
        )
)

estimator = TransformedTargetRegressor(estimator, transformer=StandardScaler())
cv = KFold(shuffle=True)
encoder = StateEncoder(estimator=estimator, cv=cv, verbose=True)
runner = EncodeRunner(
    loader=loader,
    preprocessor=preprocessor,
    encoder=encoder,
)

In [None]:
pop = runner.run_multiple_pop(sessions=sessions)
pop_shuffle = runner.run_multiple_pop(sessions=sessions, shuffle=True)
state = runner.run_multiple_state(sessions=sessions)
state_shuffle = runner.run_multiple_state(sessions=sessions, shuffle=True)
limit = runner.run_multiple_limit(sessions=sessions, min_features=1, max_features=30)
dropout = runner.run_multiple_dropout(sessions=sessions)

In [None]:
dd = Config.derived_data_dir / "encoding"
dd.mkdir(exist_ok=True)

pop.to_csv(dd / "brain state - pop.csv", index=False)
pop_shuffle.to_csv(dd / "brain state - pop shuffle.csv", index=False)
state.to_csv(dd / "brain state - state.csv", index=False)
state_shuffle.to_csv(dd / "brain state - state shuffle.csv", index=False)
limit.to_csv(dd / "brain state - limit.csv", index=False)
dropout.to_csv(dd / "brain state - dropout.csv", index=False)
    