In [46]:
import numpy as np
import pandas as pd
import workflows as wf

In [44]:
# -------------- TRAINING DATA -------------------

def generate_training_data():
    """Example function to generate toy training set"""

    t_coords = np.linspace(273, 330, 10) #temperature samples
    c_coords = np.linspace(0, 3, 10) #concentration samples
    b1, b2 = 1e-3, -2e-3 #coefficients

    #training dataframe
    train_data = pd.DataFrame(data = np.array([np.repeat(t_coords,100), np.hstack(100*(c_coords,))]).T, 
                            columns=['T', 'c'])

    #apply expression to be found via symbolic regression: k = b1*c*t**1.5 + b2*t*c**1.5
    train_data['k'] = b1*train_data['c']*train_data['T'].pow(1.5) + b2*train_data['T']*train_data['c'].pow(1.5)
    
    return train_data


In [5]:
# --------------  HYPER-PARAMETERS FEATURE GENERATION -------------------

#how to scale data. Supported 'standard_nomean', 'standard', 'none'
SCALING_TYPE = 'standard_nomean' 

#whether to leave intercept to vary freely (True) or constrain its value to y0 = 0 (False).
FIT_INTERCEPT = False 

# Autofeat hyperparameter.
# number of times features are combined to obtain ever more complex features.
# example FEATENG_STEPS = 3 with sqrt transformations will find terms like sqrt(sqrt(sqrt(x)))
FEATENG_STEPS = 6

# Autofeat hyperparameter.
# Units of predictors. Keys must match column names in dataframe. 
# Ignored predictors are assumed to be dimensionless.
UNITS = {"T": "1/K",
        "c": "mol/kg"}

# Autofeat hyperparameter.
# Number of iterations for filtering out generated features.
FEATSEL_RUNS = 5

# Autofeat hyperparameter.
# Set of non-linear transformations to be applied to initial predictors.
TRANSFORMATIONS = ["sqrt", "^2"]


# --------------  HYPER-PARAMETERS FEATURE SELECTION -------------------

# n-standard deviations criterion to choose optimal alpha from Cross Validation. 
# Higher STD_ALPHA lead to sparser solutions.
STD_ALPHA = 1 

#t-statistic rejection threshold. Coefficients with t-statistic < REJECTION_THR are rejected.
REJECTION_THR = 2 


{'data': {'subsample': 0, 'path_data': './data', 'path_data_train': './data/dataset_train.pkl', 'path_data_val': './data/dataset_val.pkl', 'path_data_test': './data/dataset_test.pkl', 'path_results': './results', 'path_eda': './eda', 'random_state': 42}, 'skip': ['Linear_onlyT', 'Poly_2', 'Arrh_onlyT', 'Linear_onlyT', 'Linear', 'Poly_2', 'Poly_3', 'Arrh_onlyT', 'Linear_Arrh', 'Poly2_Arrh', 'Autofeat_1', 'Autofeat_2', 'Autofeat_3', 'Autofeat_4', 'Autofeat_5', 'Autofeat_6', 'Selected_2'], 'models': {'Linear_onlyT': {'scaling': 'standard', 'stdalpha': 1, 'thresshold': None, 'features': ['T'], 'workflow': 'NoGen', 'fit_intercept': True}, 'Linear': {'scaling': 'standard', 'stdalpha': 1, 'thresshold': None, 'features': ['T', 'Li', 'PC'], 'workflow': 'NoGen', 'fit_intercept': True}, 'Poly_2': {'scaling': 'standard', 'stdalpha': 1, 'thresshold': 2, 'features': ['T', 'Li', 'PC'], 'poly_order': 2, 'workflow': 'Poly', 'fit_intercept': True}, 'Poly_3': {'scaling': 'standard', 'stdalpha': 1, 'thres

In [None]:
# ---------------- INSTANTIATE WORKFLOW --------------------------------------
workflow = wf.WorkflowAF(feateng_steps = model_specs['feateng_steps'],
                        units =  model_specs['units'],
                        featsel_runs = model_specs['featsel_runs'],
                        transformations = model_specs['transformations'],
                        xtrain = dataset_train[model_specs['features']], 
                        ytrain = dataset_train['σ_mean'], 
                        scaling_type = model_specs['scaling'],
                        stdalpha =  model_specs['stdalpha'], 
                        rejection_thresshold = model_specs['thresshold'], 
                        fit_intercept = model_specs['fit_intercept']) 

In [None]:
def load_data():
    """
    Function to load data. In this example data is artificially generated from a function.
    However in real use scenarios you might replace this function by another loading your
    dataset from disk (e.g. loading a pickle, or a csv)"""
    pass