In [1]:
!pip install fancyimpute




In [2]:
!pip install lightgbm




In [3]:
pip install --user catboost


Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install xgboost


Note: you may need to restart the kernel to use updated packages.


In [17]:
import csv
import os
import random
from datetime import datetime

import numpy as np
import pandas as pd
import gc
from joblib import Parallel, delayed, parallel_backend
import multiprocessing as mp

from sklearn.decomposition import PCA
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.experimental import enable_iterative_imputer
from sklearn.feature_selection import (RFE, SelectKBest,
                                       SequentialFeatureSelector, f_regression)
from sklearn.impute import (IterativeImputer, KNNImputer, SimpleImputer)
from sklearn.linear_model import (ElasticNetCV, LassoCV, LinearRegression)
from sklearn.metrics import mean_squared_error, r2_score, make_scorer
from sklearn.model_selection import (GridSearchCV, RandomizedSearchCV,
                                     cross_val_score, train_test_split)
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import (MaxAbsScaler, MinMaxScaler, RobustScaler,
                                   StandardScaler)
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

from catboost import CatBoostRegressor
from fancyimpute import MatrixFactorization, NuclearNormMinimization, SoftImpute
from lightgbm import LGBMRegressor


In [18]:

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
Month_0_visits = pd.read_csv('/home/jupyter/imported/Month_0_visits')

df = Month_0_visits 
updrs = 'updrs_1'

In [19]:
def log_message(message, counter=None):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    formatted_message = f"{timestamp} "

    if isinstance(message, dict):
        formatted_message += "{\n"
        formatted_lines = [f"  {key}: {value}," for key, value in message.items()]
        formatted_message += "\n".join(formatted_lines) + "\n}"
    elif isinstance(message, (list, tuple)):
        formatted_message += "[\n"
        formatted_lines = [f"  {item}," for item in message]
        formatted_message += "\n".join(formatted_lines) + "\n]"
    else:
        formatted_message += f"{message}"

    if counter is not None:
        formatted_message = f"[{counter}] " + formatted_message

    print(formatted_message)


In [20]:
def split(df, target):
    dropped_columns = [
        'updrs_1', 
        'updrs_2', 
        'updrs_3', 
        'updrs_4', 
        'upd23b_clinical_state_on_medication'
    ]
    target_columns = [target] if isinstance(target, str) else target

    df = df.dropna(subset=target_columns)

    X = df.drop(columns=dropped_columns)
    y = df[target_columns]

    return X, y


In [21]:
X, y = split(df, updrs)

In [22]:
cols_w_percent_nans_to_drop_dict = {
    'None': 'None',
    '0.5 %': .005,
    '1.0 %': .01,
    '5.0 %': .05,
    '10.0 %': .10,
    '15.0 %': .15,
    '20.0 %': .20
}

imputers_dict = {
    'None': None,
    'mean': SimpleImputer(strategy='mean'),
    'median': SimpleImputer(strategy='median'),
    'most_frequent': SimpleImputer(strategy='most_frequent'),
    'KNN_1': KNNImputer(n_neighbors=1),
    'KNN_2': KNNImputer(n_neighbors=2),
    'KNN_3': KNNImputer(n_neighbors=3),
    'KNN_4': KNNImputer(n_neighbors=4),
    'KNN_5': KNNImputer(n_neighbors=5),
    'KNN_6': KNNImputer(n_neighbors=6),
    'KNN_7': KNNImputer(n_neighbors=7),
    'soft_impute': SoftImpute(verbose=False)
}

scalers_dict = {
    'None': None,
    'standard_scaler': StandardScaler(),
    'min_max_scaler': MinMaxScaler(),
    'Robust_Scaler': RobustScaler()
}

Principal_component_analysis = {
    'None': None,
    'PCA 5': 0.05,
    'PCA 10': 0.10,
    'PCA 20': 0.20
}

def identity_selector(X):
    return X

feature_selectors = {
    'No Selector': identity_selector,
    'SelectKBest 5': SelectKBest(score_func=f_regression),
    'SelectKBest 10': SelectKBest(score_func=f_regression),
    'SelectKBest 20': SelectKBest(score_func=f_regression),
    'RFE 5': RFE(LinearRegression(), verbose=True),
    'RFE 10': RFE(LinearRegression(), verbose=True),
    'RFE 20': RFE(LinearRegression(), verbose=True),
    'SequentialFeatureSelector Forward 5': SequentialFeatureSelector(
        LinearRegression(), direction='forward'),
    'SequentialFeatureSelector Forward 10': SequentialFeatureSelector(
        LinearRegression(), direction='forward'),
    'SequentialFeatureSelector Forward 20': SequentialFeatureSelector(
        LinearRegression(), direction='forward')
}

models = {
    'LassoCV': LassoCV(n_jobs=-1, max_iter=10000, random_state=42, verbose=False),
    'ElasticNetCV': ElasticNetCV(n_jobs=-1, max_iter=10000, random_state=42, verbose=False),
    'SVR': SVR(verbose=False),
    'XGBRegressor': XGBRegressor(random_state=42, n_jobs=1, verbosity=0),
    'GradientBoostingRegressor': GradientBoostingRegressor(random_state=42, verbose=0),
    'LGBMRegressor': LGBMRegressor(random_state=42, n_jobs=-1, verbose=0, force_col_wise=True),
    'CatBoostRegressor': CatBoostRegressor(random_state=42, verbose=0, thread_count=-1),
    'RandomForestRegressor': RandomForestRegressor(random_state=42, n_jobs=-1, verbose=0)
}


In [23]:
def smape(y_true, y_pred):
    y_true = np.array(y_true).reshape(-1)
    y_pred = np.array(y_pred).reshape(-1)
    return 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-8))

In [24]:
def generate_population(population_size=100):
    population = []
    
    for _ in range(population_size):
        individual = {
            'imputer': random.choice(list(imputers_dict.keys())),
            'scaler': random.choice(list(scalers_dict.keys())),
            'nan_threshold': random.choice(list(cols_w_percent_nans_to_drop_dict.keys())),
            'model': random.choice(list(models.keys())),
            'pca': random.choice(list(Principal_component_analysis.keys())),
            'feature_selector': random.choice(list(feature_selectors.keys())),
            'sMAPE': 200.00
        }
        population.append(individual)

    return population


In [25]:
def train_and_evaluate_individual(individual, X, y, counter):
    def drop_nans(individual, X):
        nan_threshold = cols_w_percent_nans_to_drop_dict[individual['nan_threshold']]
        if nan_threshold is not None:
            columns_to_drop_ = X.columns[X.isna().mean() > nan_threshold]
        else:
            columns_to_drop_ = []
        X_dropped =  X.drop(columns_to_drop_, axis=1)
        n_features = X_dropped.shape[1]
        return X_dropped, n_features

    def impute(individual, X_dropped, n_features):
        imputer = imputers_dict[individual['imputer']]
        if imputer == 'None':
            X_imputed = X_dropped
            n_features = X_imputed.shape[1]
        else:
            X_imputed = imputer.fit_transform(X_dropped)
            n_features = X_imputed.shape[1]
            fallback_imputer = SimpleImputer(strategy='mean')  # You can change the strategy to 'median' if desired

            # Check and correct NaN values
            if pd.DataFrame(X_imputed).isna().any().any():            
                log_message(f'X_imputed had nans from {individual["imputer"]}', counter)
                X_imputed = fallback_imputer.fit_transform(X_imputed)

        return X_imputed, n_features

    def scale(individual, X_train_imputed, X_val_imputed, n_features):
        scaler = scalers_dict[individual['scaler']]
        if scaler is not None:
            X_train_scaled = scaler.fit_transform(X_train_imputed)
            X_val_scaled = scaler.transform(X_val_imputed)
        else:
            X_train_scaled = X_train_imputed
            X_val_scaled = X_val_imputed
        n_features = X_train_scaled.shape[1]
        return X_train_scaled, X_val_scaled, n_features

    def apply_pca(individual, X_train_scaled, X_val_scaled, n_features):
        pca_ratio = Principal_component_analysis[individual['pca']]
        if pca_ratio != None:
            pca = PCA(n_components=int(n_features * pca_ratio))
            X_train_pca = pca.fit_transform(X_train_scaled)
            X_val_pca = pca.transform(X_val_scaled)
        else:
            X_train_pca = X_train_scaled
            X_val_pca = X_val_scaled
        n_features = X_train_pca.shape[1]
        return X_train_pca, X_val_pca, n_features

    def get_feature_selectors(n_features):
        n_feature = n_features
        return {
            'No Selector': identity_selector,
            'SelectKBest 5': SelectKBest(score_func=f_regression, k=max(int(n_feature * .05), 1)),
            'SelectKBest 10': SelectKBest(score_func=f_regression, k=max(int(n_feature * .10), 1)),
            'SelectKBest 20': SelectKBest(score_func=f_regression, k=max(int(n_feature * .20), 1)),
            'RFE 5': RFE(LinearRegression(), n_features_to_select=max(int(n_feature * 0.05), 1)),
            'RFE 10': RFE(LinearRegression(), n_features_to_select=max(int(n_feature * 0.1), 1)),
            'RFE 20': RFE(LinearRegression(), n_features_to_select=max(int(n_feature * 0.2), 1)),
            'SequentialFeatureSelector Forward 5': SequentialFeatureSelector(
                LinearRegression(), direction='forward', n_features_to_select=max(int(n_feature * 0.05), 1)),
            'SequentialFeatureSelector Forward 10': SequentialFeatureSelector(
                LinearRegression(), direction='forward', n_features_to_select=max(int(n_feature * 0.1), 1)),
            'SequentialFeatureSelector Forward 20': SequentialFeatureSelector(
                LinearRegression(), direction='forward', n_features_to_select=max(int(n_feature * 0.2), 1))
        }

    def select_features(individual, X_train_pca, X_val_pca, n_features):
        feature_selectors = get_feature_selectors(n_features)
        selector = feature_selectors[individual['feature_selector']]
        if individual['feature_selector'] == 'No Selector':
            X_train_selected = selector(X_train_pca)
            X_val_selected = selector(X_val_pca)
        else:
            X_train_selected = selector.fit_transform(X_train_pca, y_train)
            X_val_selected = selector.transform(X_val_pca)
        n_features = X_train_selected.shape[1]
        return X_train_selected, X_val_selected, n_features


    def model(individual, X_train_selected, X_val_selected, y_train, y_val):
        model = models[individual['model']]
        model.fit(X_train_selected, y_train)
        y_pred = model.predict(X_val_selected)
        individual['sMAPE'] = smape(y_val, y_pred)
        return individual['sMAPE']
    
    
    ###log_message('dropping nans...', counter)
    X_dropped, n_features = drop_nans(individual, X)
    ###log_message('imputing...', counter)
    X_imputed, n_features = impute(individual, X_dropped, n_features)
    
    y = np.ravel(y)
    ###log_message('splitting train and val...', counter)
    X_train_imputed, X_val_imputed, y_train, y_val = train_test_split(X_imputed, y, test_size=0.2, random_state=42)
    ###log_message('scaling...', counter)
    X_train_scaled, X_val_scaled, n_features = scale(individual, X_train_imputed, X_val_imputed, n_features)
    X_train_pca, X_val_pca, n_features = apply_pca(individual, X_train_imputed, X_val_imputed, n_features)

    feature_selectors = get_feature_selectors(X_train_pca.shape[1])
    ###log_message('selecting features...', counter)
    ###individual['feature_selector'] = random.choice(list(feature_selectors.keys()))

    X_train_selected, X_val_selected, n_features = select_features(individual, X_train_pca, X_val_pca, n_features)
    
    try:
        log_message('modeling...', counter)
        if X_train_selected.shape[1] == 0:
            indiv_smape = 200.0  # Penalize configuration with a high sMAPE value
        else:
            indiv_smape = model(individual, X_train_selected, X_val_selected, y_train, y_val)
        individual['sMAPE'] = indiv_smape
        log_message(individual, counter)
    except Exception as e:
        log_message(f'Model failed to train due to {e}')
        individual['sMAPE'] = 200

        log_message(f'Individual: {individual}')
        log_message(f'sMAPE: {indiv_smape}')
        log_message(f'Shape: {X_train_selected.shape[1]}')
        gc.collect()
    return indiv_smape


def _worker(individual, idx, X, y):
    return idx, train_and_evaluate_individual(individual, X, y, idx)

def train_and_evaluate_population(population, X, y, timeout=None):
    smape_values = []

    with mp.Pool(processes=mp.cpu_count()) as pool:
        async_results = [pool.apply_async(_worker, args=(individual, idx, X, y)) for idx, individual in enumerate(population)]

        for idx, async_result in enumerate(async_results):
            try:
                result_idx, smape_value = async_result.get(timeout) if timeout else async_result.get()
            except mp.TimeoutError:
                smape_value = 200  # Penalize configuration with a high sMAPE value if it exceeds the timeout
                log_message(f'Model training for individual {idx} exceeded the timeout', idx)
            except Exception as e:
                smape_value = 200  # Penalize configuration with a high sMAPE value if an error occurs
                log_message(f'Model training for individual {idx} failed due to {e}', idx)

            individual = population[idx]

            if np.isnan(smape_value):
                smape_value = 200

            smape_values.append(smape_value)
            individual['sMAPE'] = smape_value




In [26]:
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum()

In [27]:
gene_to_dict_mapping = {
    'imputer': 'imputers_dict',
    'scaler': 'scalers_dict',
    'nan_threshold': 'cols_w_percent_nans_to_drop_dict',
    'model': 'models',
    'pca': 'Principal_component_analysis',
    'feature_selector': 'feature_selectors'
}
def select_and_generate_new_generation(population, generation, num_survivors=10, num_children=90):
    # Select survivors
    population.sort(key=lambda x: x['sMAPE'])
    survivors = population[:num_survivors]

    # Calculate mutation rate
    mutation_rate = (0.20 * (0.93 ** generation))

    # Roulette wheel selection
    fitness_scores = np.array([individual['sMAPE'] for individual in survivors])
    probabilities = softmax(-fitness_scores)  # Minimize sMAPE, so use the negative of fitness_scores

    new_generation = survivors.copy()
    for _ in range(num_children):
        
        # Select two parents
        parents = np.random.choice(survivors, size=2, replace=False, p=probabilities)

        # Crossover
        child = {}
        for gene in parents[0].keys():
            child[gene] = random.choice([parents[0][gene], parents[1][gene]])

        # Mutate
        for gene in child.keys():
            if gene != 'sMAPE' and random.random() < mutation_rate:
                gene_dict = globals()[gene_to_dict_mapping[gene]]
                child[gene] = random.choice(list(gene_dict.keys()))


        child['sMAPE'] = 0.00
        new_generation.append(child)
    gc.collect()
    return new_generation, survivors

In [None]:
generation = 0
population = generate_population(population_size=100)
log_message('Initial population generated')
train_and_evaluate_population(population, X, y, 1200)
log_message('Initial population trained and evaluated')

while True:  # Define your stopping condition function
    # Save the population to a CSV file
    population_file = f'population_{generation}.csv'
    population_df = pd.DataFrame(population)
    population_df.to_csv(population_file, index=False)
    
    new_population, survivors = select_and_generate_new_generation(population, generation)
    log_message('New Population Generated')
    survivor_file = f'survivor_{generation}.csv'
    survivor_df = pd.DataFrame(survivors)
    survivor_df.to_csv(population_file, index=False)
    log_message(f"Generation {generation} survivors:")

    for idx, survivor in enumerate(survivors, 1):
        log_message(f"Survivor {idx}. sMAPE: {survivor['sMAPE']:.4f}, Configuration: {survivor}")
        
    train_and_evaluate_population(new_population, X, y, 1200)

    
    population = new_population
    generation += 1
    gc.collect()


2023-05-05 22:40:03 Initial population generated
[13] 2023-05-05 22:40:06 modeling...
[13] 2023-05-05 22:40:06 {
  imputer: mean,
  scaler: Robust_Scaler,
  nan_threshold: 0.5 %,
  model: SVR,
  pca: PCA 20,
  feature_selector: SelectKBest 5,
  sMAPE: 72.80215766572826,
}
[12] 2023-05-05 22:40:11 modeling...
[1] 2023-05-05 22:40:11 modeling...


  if effective_n_jobs(self.n_jobs) > 1:


[1] 2023-05-05 22:40:12 {
  imputer: median,
  scaler: Robust_Scaler,
  nan_threshold: 15.0 %,
  model: ElasticNetCV,
  pca: PCA 10,
  feature_selector: SelectKBest 10,
  sMAPE: 71.59923088675664,
}


  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[12] 2023-05-05 22:40:12 {
  imputer: KNN_7,
  scaler: min_max_scaler,
  nan_threshold: 1.0 %,
  model: RandomForestRegressor,
  pca: PCA 10,
  feature_selector: SequentialFeatureSelector Forward 20,
  sMAPE: 73.91286759010426,
}
[15] 2023-05-05 22:40:14 modeling...
[18] 2023-05-05 22:40:16 modeling...
[8] 2023-05-05 22:40:21 modeling...
[10] 2023-05-05 22:40:21 modeling...


  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[8] 2023-05-05 22:40:22 {
  imputer: median,
  scaler: None,
  nan_threshold: 20.0 %,
  model: RandomForestRegressor,
  pca: PCA 5,
  feature_selector: SequentialFeatureSelector Forward 10,
  sMAPE: 72.99060971312899,
}
[15] 2023-05-05 22:40:22 {
  imputer: KNN_5,
  scaler: Robust_Scaler,
  nan_threshold: 5.0 %,
  model: XGBRegressor,
  pca: PCA 5,
  feature_selector: No Selector,
  sMAPE: 73.34126857326521,
}
[14] 2023-05-05 22:40:22 modeling...


  if effective_n_jobs(self.n_jobs) > 1:
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[18] 2023-05-05 22:40:22 {
  imputer: KNN_5,
  scaler: min_max_scaler,
  nan_threshold: 10.0 %,
  model: RandomForestRegressor,
  pca: None,
  feature_selector: SelectKBest 20,
  sMAPE: 68.42735773792087,
}
[14] 2023-05-05 22:40:23 {
  imputer: KNN_1,
  scaler: standard_scaler,
  nan_threshold: 5.0 %,
  model: LassoCV,
  pca: PCA 20,
  feature_selector: No Selector,
  sMAPE: 71.44610484419603,
}
[25] 2023-05-05 22:40:25 modeling...
[3] 2023-05-05 22:40:25 modeling...
[25] 2023-05-05 22:40:27 {
  imputer: median,
  scaler: standard_scaler,
  nan_threshold: 5.0 %,
  model: SVR,
  pca: None,
  feature_selector: No Selector,
  sMAPE: 72.6648500087047,
}
[21] 2023-05-05 22:40:33 modeling...


  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[21] 2023-05-05 22:40:34 {
  imputer: KNN_7,
  scaler: Robust_Scaler,
  nan_threshold: 5.0 %,
  model: RandomForestRegressor,
  pca: PCA 10,
  feature_selector: RFE 10,
  sMAPE: 70.67509826108896,
}
[19] 2023-05-05 22:40:38 modeling...
[19] 2023-05-05 22:40:39 {
  imputer: KNN_5,
  scaler: Robust_Scaler,
  nan_threshold: 15.0 %,
  model: SVR,
  pca: PCA 5,
  feature_selector: SequentialFeatureSelector Forward 10,
  sMAPE: 73.36640724538618,
}
[27] 2023-05-05 22:40:48 modeling...


  if effective_n_jobs(self.n_jobs) > 1:


[27] 2023-05-05 22:40:48 {
  imputer: KNN_5,
  scaler: None,
  nan_threshold: 15.0 %,
  model: ElasticNetCV,
  pca: None,
  feature_selector: SelectKBest 5,
  sMAPE: 71.13172933683195,
}
[7] 2023-05-05 22:40:52 modeling...
[7] 2023-05-05 22:40:53 {
  imputer: soft_impute,
  scaler: min_max_scaler,
  nan_threshold: 0.5 %,
  model: DecisionTreeRegressor,
  pca: None,
  feature_selector: RFE 10,
  sMAPE: 87.66126981484624,
}
[3] 2023-05-05 22:41:05 {
  imputer: KNN_1,
  scaler: min_max_scaler,
  nan_threshold: 20.0 %,
  model: CatBoostRegressor,
  pca: PCA 10,
  feature_selector: SelectKBest 20,
  sMAPE: 71.09227167487947,
}
[16] 2023-05-05 22:41:08 modeling...
[16] 2023-05-05 22:41:09 {
  imputer: KNN_3,
  scaler: min_max_scaler,
  nan_threshold: 20.0 %,
  model: GradientBoostingRegressor,
  pca: PCA 20,
  feature_selector: RFE 5,
  sMAPE: 76.46312865329486,
}
[31] 2023-05-05 22:41:11 modeling...
[31] 2023-05-05 22:41:11 {
  imputer: KNN_3,
  scaler: None,
  nan_threshold: 5.0 %,
  model

  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[22] 2023-05-05 22:41:21 {
  imputer: KNN_1,
  scaler: min_max_scaler,
  nan_threshold: 15.0 %,
  model: RandomForestRegressor,
  pca: PCA 20,
  feature_selector: RFE 20,
  sMAPE: 73.34634176018149,
}
[35] 2023-05-05 22:41:30 modeling...


  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[35] 2023-05-05 22:41:30 {
  imputer: KNN_3,
  scaler: None,
  nan_threshold: 15.0 %,
  model: RandomForestRegressor,
  pca: PCA 5,
  feature_selector: SequentialFeatureSelector Forward 10,
  sMAPE: 71.25096056773604,
}
[34] 2023-05-05 22:42:33 modeling...
[34] 2023-05-05 22:42:33 {
  imputer: KNN_1,
  scaler: Robust_Scaler,
  nan_threshold: 20.0 %,
  model: SVR,
  pca: PCA 20,
  feature_selector: SequentialFeatureSelector Forward 5,
  sMAPE: 71.62657923237721,
}
[20] 2023-05-05 22:42:52 modeling...
[20] 2023-05-05 22:42:52 {
  imputer: soft_impute,
  scaler: min_max_scaler,
  nan_threshold: 10.0 %,
  model: DecisionTreeRegressor,
  pca: PCA 20,
  feature_selector: SequentialFeatureSelector Forward 10,
  sMAPE: 94.76873862080024,
}
[39] 2023-05-05 22:43:28 modeling...
[39] 2023-05-05 22:43:30 {
  imputer: soft_impute,
  scaler: min_max_scaler,
  nan_threshold: 5.0 %,
  model: XGBRegressor,
  pca: PCA 5,
  feature_selector: SelectKBest 20,
  sMAPE: 77.73692748076814,
}
[37] 2023-05-05 2

  if effective_n_jobs(self.n_jobs) > 1:


[37] 2023-05-05 22:43:36 {
  imputer: mean,
  scaler: standard_scaler,
  nan_threshold: 5.0 %,
  model: ElasticNetCV,
  pca: PCA 20,
  feature_selector: SequentialFeatureSelector Forward 20,
  sMAPE: 74.71032541476238,
}
[41] 2023-05-05 22:43:52 modeling...
[41] 2023-05-05 22:44:05 {
  imputer: KNN_7,
  scaler: Robust_Scaler,
  nan_threshold: 10.0 %,
  model: GradientBoostingRegressor,
  pca: None,
  feature_selector: SelectKBest 10,
  sMAPE: 69.49036923326715,
}
[40] 2023-05-05 22:44:44 modeling...
[42] 2023-05-05 22:45:23 modeling...
[9] 2023-05-05 22:45:34 modeling...


  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[9] 2023-05-05 22:45:36 {
  imputer: KNN_6,
  scaler: Robust_Scaler,
  nan_threshold: 10.0 %,
  model: RandomForestRegressor,
  pca: PCA 20,
  feature_selector: SequentialFeatureSelector Forward 20,
  sMAPE: 70.85605828482687,
}
[42] 2023-05-05 22:45:42 {
  imputer: KNN_7,
  scaler: None,
  nan_threshold: 10.0 %,
  model: CatBoostRegressor,
  pca: PCA 20,
  feature_selector: SequentialFeatureSelector Forward 5,
  sMAPE: 75.42132152743473,
}
[28] 2023-05-05 22:45:50 modeling...


  if effective_n_jobs(self.n_jobs) > 1:


[28] 2023-05-05 22:45:51 {
  imputer: KNN_4,
  scaler: standard_scaler,
  nan_threshold: 10.0 %,
  model: ElasticNetCV,
  pca: PCA 20,
  feature_selector: SequentialFeatureSelector Forward 20,
  sMAPE: 72.9739936102179,
}
[45] 2023-05-05 22:46:01 modeling...
[47] 2023-05-05 22:46:12 modeling...
[47] 2023-05-05 22:46:13 {
  imputer: KNN_3,
  scaler: None,
  nan_threshold: 10.0 %,
  model: SVR,
  pca: PCA 5,
  feature_selector: SelectKBest 20,
  sMAPE: 71.96320246957482,
}
[43] 2023-05-05 22:46:18 modeling...


  if effective_n_jobs(self.n_jobs) > 1:


[43] 2023-05-05 22:46:19 {
  imputer: KNN_5,
  scaler: min_max_scaler,
  nan_threshold: 20.0 %,
  model: ElasticNetCV,
  pca: PCA 20,
  feature_selector: SelectKBest 20,
  sMAPE: 74.05666283884413,
}
[50] 2023-05-05 22:46:31 modeling...
[48] 2023-05-05 22:46:31 modeling...


  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[48] 2023-05-05 22:46:32 {
  imputer: soft_impute,
  scaler: None,
  nan_threshold: 1.0 %,
  model: RandomForestRegressor,
  pca: PCA 5,
  feature_selector: SequentialFeatureSelector Forward 10,
  sMAPE: 82.17975251560517,
}
[51] 2023-05-05 22:46:38 modeling...
[2] 2023-05-05 22:50:57 modeling...
[2] 2023-05-05 22:50:58 {
  imputer: soft_impute,
  scaler: Robust_Scaler,
  nan_threshold: 1.0 %,
  model: DecisionTreeRegressor,
  pca: None,
  feature_selector: SequentialFeatureSelector Forward 10,
  sMAPE: 91.7989518534362,
}
[54] 2023-05-05 22:51:00 modeling...
[54] 2023-05-05 22:51:03 {
  imputer: KNN_2,
  scaler: standard_scaler,
  nan_threshold: 0.5 %,
  model: XGBRegressor,
  pca: PCA 5,
  feature_selector: SelectKBest 10,
  sMAPE: 77.21112229432171,
}
[36] 2023-05-05 22:51:31 modeling...
[36] 2023-05-05 22:51:32 {
  imputer: KNN_2,
  scaler: None,
  nan_threshold: 5.0 %,
  model: DecisionTreeRegressor,
  pca: None,
  feature_selector: RFE 20,
  sMAPE: 89.09964619047709,
}
[55] 2023-

  if effective_n_jobs(self.n_jobs) > 1:


[59] 2023-05-05 22:52:49 {
  imputer: KNN_4,
  scaler: None,
  nan_threshold: 0.5 %,
  model: LassoCV,
  pca: PCA 20,
  feature_selector: SelectKBest 5,
  sMAPE: 72.21373810832031,
}
[56] 2023-05-05 22:52:51 modeling...


  if effective_n_jobs(self.n_jobs) > 1:


[56] 2023-05-05 22:52:51 {
  imputer: KNN_6,
  scaler: Robust_Scaler,
  nan_threshold: 15.0 %,
  model: LassoCV,
  pca: PCA 20,
  feature_selector: RFE 20,
  sMAPE: 75.20254341105912,
}
[62] 2023-05-05 22:52:58 modeling...


  if effective_n_jobs(self.n_jobs) > 1:


[62] 2023-05-05 22:52:59 {
  imputer: KNN_3,
  scaler: None,
  nan_threshold: 0.5 %,
  model: LassoCV,
  pca: PCA 20,
  feature_selector: SelectKBest 5,
  sMAPE: 72.22220830790165,
}
[63] 2023-05-05 22:53:10 modeling...


  if effective_n_jobs(self.n_jobs) > 1:


[63] 2023-05-05 22:53:10 {
  imputer: KNN_2,
  scaler: Robust_Scaler,
  nan_threshold: 1.0 %,
  model: LassoCV,
  pca: PCA 20,
  feature_selector: SelectKBest 20,
  sMAPE: 71.12145870088679,
}
[60] 2023-05-05 22:53:18 modeling...
[60] 2023-05-05 22:53:22 {
  imputer: mean,
  scaler: standard_scaler,
  nan_threshold: 5.0 %,
  model: XGBRegressor,
  pca: PCA 20,
  feature_selector: RFE 10,
  sMAPE: 73.51269371777006,
}
[64] 2023-05-05 22:53:23 modeling...
[64] 2023-05-05 22:53:23 {
  imputer: KNN_5,
  scaler: None,
  nan_threshold: 5.0 %,
  model: SVR,
  pca: None,
  feature_selector: SelectKBest 5,
  sMAPE: 72.23594478929664,
}
[65] 2023-05-05 22:53:27 modeling...
[6] 2023-05-05 22:53:32 modeling...
[57] 2023-05-05 22:53:34 modeling...
[57] 2023-05-05 22:53:40 {
  imputer: soft_impute,
  scaler: standard_scaler,
  nan_threshold: 5.0 %,
  model: GradientBoostingRegressor,
  pca: PCA 5,
  feature_selector: No Selector,
  sMAPE: 73.14322270298081,
}
[6] 2023-05-05 22:53:42 {
  imputer: KNN

  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[72] 2023-05-05 22:54:35 {
  imputer: median,
  scaler: Robust_Scaler,
  nan_threshold: 15.0 %,
  model: RandomForestRegressor,
  pca: PCA 5,
  feature_selector: SequentialFeatureSelector Forward 20,
  sMAPE: 71.76213069795966,
}
[74] 2023-05-05 22:54:37 modeling...


  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)


[74] 2023-05-05 22:54:38 {
  imputer: mean,
  scaler: min_max_scaler,
  nan_threshold: 1.0 %,
  model: RandomForestRegressor,
  pca: PCA 5,
  feature_selector: SelectKBest 5,
  sMAPE: 81.2212653087523,
}
[67] 2023-05-05 22:55:23 modeling...
[67] 2023-05-05 22:55:23 {
  imputer: soft_impute,
  scaler: standard_scaler,
  nan_threshold: 10.0 %,
  model: DecisionTreeRegressor,
  pca: PCA 20,
  feature_selector: SelectKBest 5,
  sMAPE: 93.0157158385661,
}
[69] 2023-05-05 22:55:36 modeling...


  if effective_n_jobs(self.n_jobs) > 1:


[69] 2023-05-05 22:55:37 {
  imputer: KNN_7,
  scaler: Robust_Scaler,
  nan_threshold: 10.0 %,
  model: LassoCV,
  pca: PCA 20,
  feature_selector: SequentialFeatureSelector Forward 5,
  sMAPE: 71.91901496283873,
}
[80] 2023-05-05 22:55:41 modeling...
[77] 2023-05-05 22:55:48 modeling...
[75] 2023-05-05 22:55:51 modeling...
[75] 2023-05-05 22:55:51 {
  imputer: KNN_3,
  scaler: Robust_Scaler,
  nan_threshold: 10.0 %,
  model: DecisionTreeRegressor,
  pca: PCA 10,
  feature_selector: SequentialFeatureSelector Forward 10,
  sMAPE: 98.76667738195545,
}
[80] 2023-05-05 22:55:52 {
  imputer: most_frequent,
  scaler: standard_scaler,
  nan_threshold: 0.5 %,
  model: CatBoostRegressor,
  pca: PCA 10,
  feature_selector: RFE 10,
  sMAPE: 75.2066943830437,
}
[77] 2023-05-05 22:55:59 {
  imputer: KNN_4,
  scaler: standard_scaler,
  nan_threshold: 15.0 %,
  model: XGBRegressor,
  pca: None,
  feature_selector: SelectKBest 5,
  sMAPE: 68.92701242661047,
}
[83] 2023-05-05 22:56:04 modeling...
[83] 

  if effective_n_jobs(self.n_jobs) > 1:


[90] 2023-05-05 22:58:07 {
  imputer: most_frequent,
  scaler: standard_scaler,
  nan_threshold: 1.0 %,
  model: ElasticNetCV,
  pca: PCA 10,
  feature_selector: RFE 5,
  sMAPE: 71.66041121335492,
}
[87] 2023-05-05 22:58:21 modeling...
[87] 2023-05-05 22:58:24 {
  imputer: KNN_3,
  scaler: min_max_scaler,
  nan_threshold: 20.0 %,
  model: XGBRegressor,
  pca: PCA 20,
  feature_selector: RFE 5,
  sMAPE: 79.57956402211474,
}
[93] 2023-05-05 22:58:30 modeling...
[82] 2023-05-05 22:58:54 {
  imputer: mean,
  scaler: Robust_Scaler,
  nan_threshold: 10.0 %,
  model: CatBoostRegressor,
  pca: PCA 10,
  feature_selector: SequentialFeatureSelector Forward 20,
  sMAPE: 73.31725958458708,
}
[88] 2023-05-05 22:59:10 modeling...
[88] 2023-05-05 22:59:12 {
  imputer: soft_impute,
  scaler: min_max_scaler,
  nan_threshold: 10.0 %,
  model: GradientBoostingRegressor,
  pca: PCA 10,
  feature_selector: SequentialFeatureSelector Forward 10,
  sMAPE: 73.32799150760417,
}
[97] 2023-05-05 22:59:22 modeling

  if effective_n_jobs(self.n_jobs) > 1:


[97] 2023-05-05 22:59:23 {
  imputer: median,
  scaler: min_max_scaler,
  nan_threshold: 10.0 %,
  model: ElasticNetCV,
  pca: PCA 5,
  feature_selector: RFE 20,
  sMAPE: 71.86491951223346,
}
[98] 2023-05-05 23:00:12 modeling...


  if effective_n_jobs(self.n_jobs) > 1:


[98] 2023-05-05 23:00:14 {
  imputer: KNN_5,
  scaler: Robust_Scaler,
  nan_threshold: 15.0 %,
  model: LassoCV,
  pca: PCA 20,
  feature_selector: No Selector,
  sMAPE: 71.4656204375523,
}
[96] 2023-05-05 23:00:20 modeling...
[93] 2023-05-05 23:00:34 {
  imputer: KNN_1,
  scaler: min_max_scaler,
  nan_threshold: 10.0 %,
  model: XGBRegressor,
  pca: None,
  feature_selector: No Selector,
  sMAPE: 68.6042938905225,
}
[29] 2023-05-05 23:00:47 modeling...
[94] 2023-05-05 23:01:00 modeling...
[96] 2023-05-05 23:01:37 {
  imputer: soft_impute,
  scaler: min_max_scaler,
  nan_threshold: 10.0 %,
  model: CatBoostRegressor,
  pca: PCA 20,
  feature_selector: RFE 20,
  sMAPE: 73.34484372953254,
}
