In [1]:
from sklearn.model_selection import KFold
from lightgbm import LGBMRegressor
from sklearn.compose import make_column_transformer, make_column_selector, TransformedTargetRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import KFold


from drn_interactions.config import ExperimentInfo

from drn_interactions.decoding.loaders import StateDecodeDataLoader
from drn_interactions.decoding.preprocessors import StateDecodePreprocessor
from drn_interactions.decoding.encoders import StateEncoder
from drn_interactions.decoding.runners import EncodeRunner
from drn_interactions.decoding.shuffle import shuffle_X

In [2]:
sessions = ExperimentInfo.eeg_sessions
session = sessions[0]
loader = StateDecodeDataLoader(block="pre", t_stop=1800, session_name=session)
preprocessor = StateDecodePreprocessor(
    thresh_empty=2,
)
spikes, states = loader()
spikes, states = preprocessor(spikes, states)

ct = make_column_transformer(
    (
        StandardScaler(),
        make_column_selector(dtype_exclude=object)
    ),  
    (
        OneHotEncoder(drop="if_binary"),
        make_column_selector(dtype_include=object)
    ), 
)

estimator = make_pipeline(ct, 
    LGBMRegressor(
        n_estimators=20, 
        n_jobs=-1, 
        force_row_wise=True,
        reg_lambda=0.8,
        )
)

estimator = TransformedTargetRegressor(estimator, transformer=StandardScaler())
cv = KFold(shuffle=True)
encoder = StateEncoder(estimator=estimator, cv=cv, verbose=True)
runner = EncodeRunner(
    loader=loader,
    preprocessor=preprocessor,
    encoder=encoder,
)

In [3]:
# pop = runner.run_multiple_pop(sessions=sessions)
# state = runner.run_multiple_state(sessions=sessions)
limit = runner.run_multiple_limit(sessions=sessions, min_features=1, max_features=30)
dropout = runner.run_multiple_dropout(sessions=sessions)

100%|██████████| 5/5 [00:12<00:00,  2.40s/it]
100%|██████████| 23/23 [10:02<00:00, 26.19s/it]
100%|██████████| 22/22 [09:00<00:00, 24.58s/it]
100%|██████████| 13/13 [01:48<00:00,  8.35s/it]
100%|██████████| 30/30 [22:43<00:00, 45.46s/it]
 83%|████████▎ | 30/36 [27:32<05:27, 54.54s/it]

In [5]:

from drn_interactions.config import Config


dd = Config.derived_data_dir / "encoding"

# pop.to_csv(dd / "brain state - pop.csv", index=False)
# state.to_csv(dd / "brain state - state.csv", index=False)
limit.to_csv(dd / "brain state - limit.csv", index=False)
dropout.to_csv(dd / "brain state - dropout.csv", index=False)
    

In [4]:
pop

Unnamed: 0,neuron_id,neuron_type,score
0,1,SIR,0.757185
1,4,SIR,0.661141
2,5,SIR,0.329905
3,6,SIR,0.471953
4,8,SIR,0.000271
5,1,FF,0.273535
6,4,FF,0.517696
7,5,FF,0.786669
8,6,FF,0.464101
9,8,FF,0.697185


In [4]:
pop

Unnamed: 0,neuron_id,neuron_type,score
0,1,SIR,0.757157
1,4,SIR,0.660273
2,5,SIR,0.333538
3,6,SIR,0.459135
4,8,SIR,-0.009721
5,1,FF,0.30258
6,4,FF,0.520686
7,5,FF,0.781449
8,6,FF,0.461495
9,8,FF,0.690863
