### This is a script to to load the property dataset and run **AutoGluon** on it. First basic model run, then different configurations are tried. Then look at possible **third level ensembling** of the models.

In [1]:
from autogluon.tabular import TabularDataset, TabularPredictor

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
random_seed = 42

In [5]:
## Function for loading one of the 10 folds of the property dataset and concatinating the X and y values for train and test respectively. 
import pandas as pd

def load_fold(fold_number, random_seed=42, sample_size=None):
    df_X_train = pd.read_parquet(f'../../data/361092/{fold_number}1/X_train.parquet')
    df_y_train = pd.read_parquet(f'../../data/361092/{fold_number}/y_train.parquet')
    df_X_test = pd.read_parquet(f'../../data/361092/{fold_number}/X_test.parquet')
    df_y_test = pd.read_parquet(f'../../data/361092/{fold_number}/y_test.parquet')

    # concatinating the X and y values for train and test respectively
    df_train = pd.concat([df_X_train, df_y_train], axis=1)
    df_test = pd.concat([df_X_test, df_y_test], axis=1)

    # Convert to AutoGluon's TabularDataset
    if sample_size:
        train_dataset = TabularDataset(df_train).sample(n=sample_size, random_state=random_seed)
        test_dataset = TabularDataset(df_test).sample(n=sample_size, random_state=random_seed)
    else:
        train_dataset = TabularDataset(df_train)
        test_dataset = TabularDataset(df_test)

    return train_dataset, test_dataset

# Also instantiate the target column
label_property = 'oz252'


In [6]:
## Dictionary containing the hyperparameters for the different AutoGluon models
# First we make one dict, but later on trying different variaties for the lvl 1 and 2 models

hyperparameters = {
    'GBM': [
        {'learning_rate': 0.1, 'num_leaves': 31, 'feature_fraction': 0.9},
        {'learning_rate': 0.05, 'num_leaves': 45, 'feature_fraction': 0.8},
    ],
    'CAT': {
        'iterations': 1000,
        'depth': 7,
        'learning_rate': 0.1,
        'l2_leaf_reg': 3,
    },
    'XGB': {
        'n_estimators': 1000,
        'max_depth': 6,
        'learning_rate': 0.1,
        'subsample': 0.8,
    },
    'NN_TORCH': {
        'num_epochs': 10,
        'learning_rate': 1e-3,
        'layers': [100, 100],
    },
    'RF': {
        'n_estimators': 100,
        'max_depth': 15,
        'min_samples_split': 2,
    },
}

In [None]:
## Function to fit the model, first with base parameter config options, but later adding more advanced options

def fit_gluon(train_dataset, problem_type='regression', label_property=label_property, hyperparameters=None, eval_metric='r2', presets='medium_quality', time_limits=100):
