In [20]:
%load_ext autoreload
%autoreload 2

import json
from itertools import product
import logging

import core.datacache as datacache
import core.filters as filters
import core.indexer as indexer
import preprocessings
import pinard.preprocessing as pp
from pinard import augmentation
import regressors
from sklearn.cross_decomposition import PLSRegression
from xgboost import XGBRegressor


logging.basicConfig(
    level=logging.WARNING,
    format="'%(name)s - %(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("debug.log"),
        logging.StreamHandler()
    ]
)

def generate_steps_combinations(steps):
    methods = []
    for step in steps.values():
        methods.append([(m, step["type"]) for m in step['method']])
    return list(product(*methods))


# TRAINING
training_config = {
    "seed": [42],
    "random_scope": "dataset", # TODO manage SEED
    "paths": [("data/test_data", [1,2,3]), "data/_Raisin/Raisin_Tavernier_830_GFratio", "data/_RefSet/ALPINE_C_424_Murguzur_RMSE1.16"],
    "pre_indexation": {
        "step_1": {
            "type": "filter",
            "method": [("crop", filters.Crop, {"start":100, "end":500}), ("crop", filters.Crop, {"start":0, "end":1000})],# None],
        },
        "step_2": {
            "type": "filter",
            "method": [("resample", filters.Uniform_FT_Resample, {"resample_size": 800})],# None],
        }
    },
    "indexation": [
        ("random_split", indexer.RandomSampling, {"test_size": 0.2}, {}),
        ("random_cv", indexer.RandomSampling, {"folds": 4, "repeat": 1}, {}),
        # None,
        ("random_cv", indexer.SXPY, {"folds": 4, "repeat": 1}, {'metric':"euclidean", 'pca_components':250}),
    ],
    "post_indexation": {
        "step_1": {
            "type": "augmentation",
            "method": [
                # None,
                [(6, augmentation.Rotate_Translate())],
                [(3, augmentation.Rotate_Translate()),(2, augmentation.Random_X_Operation()),(1, augmentation.Random_Spline_Addition()),],
                [(3, augmentation.Rotate_Translate()),(2, augmentation.Random_X_Operation()),(2, augmentation.Random_Spline_Addition()),]
            ],
        },
        "step_2": {
            "type": "preprocessing",
            "method": [
                # None,
                preprocessings.id_preprocessing(),
                [("id", pp.IdentityTransformer()), ('haar', pp.Haar()), ('savgol', pp.SavitzkyGolay())],
                preprocessings.decon_set(),
            ]
        },
    },
    "models": [
        # ()
        (regressors.Transformer_NIRS, {'batch_size':500, 'epoch':10000, 'verbose':0, 'patience':1000, 'optimizer':'adam', 'loss':'mse'}),
        (XGBRegressor, {"n_estimators":200, "max_depth":50}),
        (PLSRegression, {"n_components":50}),
        (regressors.Decon_SepPo, {'batch_size':50, 'epoch':10000, 'verbose':0, 'patience':1000, 'optimizer':'adam', 'loss':'mse'}),
    ],
}



def train_pool(config):
    for dataset_path in config["paths"]:
        logging.info(f"Processing dataset {dataset_path}")
        # 1. Load data
        dataset_uid, dataset_name = datacache.register_dataset(dataset_path)
        
        # 2 .Filter data
        pre_indexation_steps = generate_steps_combinations(config["pre_indexation"])
        logging.info(f"Pre-indexation steps: {pre_indexation_steps}")
        
        for pre_indexation_step in pre_indexation_steps:
            # 3. Indexation
            indexations = indexer.index(config["indexation"], pre_indexation_step, dataset_uid, dataset_name)
            post_indexation_steps = generate_steps_combinations(config["post_indexation"])
            runs = list(product([pre_indexation_step], indexations, post_indexation_steps, config["models"]))
            print(len(runs))
            print("\n".join([str(r) for r in runs[0:10]]))
            # for run in runs:
                # scheduler.add(run)


train_pool(training_config)
print("*"*50, "  Training done  ", "*"*50)



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


AttributeError: module 'core.datacache' has no attribute 'get'