In [1]:
sys.path.append("../")
from config import config
from feature_generation import get_all_atms_feature_set
from preprocessing import get_input_sets, scaler_fit_transform, scaler_transform, scaler_inverse_transform
from tabTransformer import TabTransformer
from misc import nmae_error, load_pickle

import optuna
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

## Load Data

In [3]:
load_config = config['load_config']

try:
    config = read_hyperparameters_from_file(load_config['hyperparameter_path'])
except:
    print("WARNING: Hyperparameter file (%s) not found. Using the default config." % load_config['hyperparameter_path'])

clusters = load_config['clusters']

df = pd.read_csv("../" + load_config['data_path'])
all_atms_feature_set = get_all_atms_feature_set(df, first_n = load_config['n_atms'])
all_atms_feature_set.sort_index(inplace = True)

# Reading Pickles
for cluster_feature in clusters:
    d = load_pickle("../" + clusters[cluster_feature]['path'])
    all_atms_feature_set[cluster_feature] = all_atms_feature_set['AtmId'].map(d)



## Setting Features

In [5]:
feature_config  = config['feature_config']

categorical_features = [cat for cat in
    all_atms_feature_set.select_dtypes(include=feature_config['categorical_column_types'])
    if cat not in feature_config['excluded_categorical']]
continuous_features = [cat for cat in
    all_atms_feature_set.select_dtypes(include=feature_config['continuous_column_types'])
    if cat not in feature_config['excluded_continuous']]

groups = [continuous_features]
groups.extend(categorical_features)

## Aranging train/test Data

In [6]:
X = all_atms_feature_set[continuous_features + categorical_features]
y = all_atms_feature_set[feature_config['target']]

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False)

# MinMaxTransform
X_train, y_train, scaler_X, scaler_y = scaler_fit_transform(X_train, y_train, continuous_features)
X_test, y_test = scaler_transform(X_test, y_test, scaler_X, scaler_y, continuous_features)

X_train = get_input_sets(X_train, groups)
X_test  = get_input_sets(X_test, groups)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[numerical_features] = scaler_X.transform(X[numerical_features])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value[:, i].tolist(), pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[numerical_features] = scaler_X.transform(X[numerical_features])
A value is try

## Optuna

In [7]:
def objective(trial):

    param = {
        'dim': trial.suggest_categorical('dim', [2,4,8]),
        'depth': trial.suggest_categorical('depth', [1,2,3,6,12]),
        'heads': trial.suggest_categorical('heads', [2,4,8]),
        'attn_dropout': trial.suggest_categorical('attn_dropout', [0,0.1,0.2,0.3,0.4,0.5]),
        'ff_dropout': trial.suggest_categorical('ff_dropout', [0,0.1,0.2,0.3,0.4,0.5])
    }

    mlp_activation = trial.suggest_categorical('mlp_activation', ['relu', 'selu'])
    mlp_hidden = [
        (trial.suggest_categorical('mlp_1_dim', [256, 128, 64]), mlp_activation),
        (trial.suggest_categorical('mlp_2_dim', [64, 32, 16]), mlp_activation),
    ]

    tabTransformer = TabTransformer(
        categories = [len(all_atms_feature_set[cat].unique())
        if cat not in clusters.keys() else
        clusters[cat]['n_clusters']
        for cat in categorical_features],
        num_continuous = len(continuous_features),
        dim_out=1,
        mlp_hidden=mlp_hidden,
        **param
    )

    training_config = config['training_config']

    tabTransformer.compile(
        optimizer = tf.optimizers.Adam(learning_rate = trial.suggest_loguniform('learning_rate', 0.005, 0.5)),
        loss = training_config['loss']
    )

    tabTransformer.fit(
        X_train,
        y_train,
        epochs = 5,
        batch_size = training_config['batch_size'],
        verbose = False)

    error = nmae_error(scaler_inverse_transform(y_test, scaler_y), scaler_y.inverse_transform(tabTransformer.predict(X_test)))
    print(error)

    return error

In [8]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)


print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
print('Best value:', study.best_value)

[32m[I 2021-08-16 12:56:30,489][0m A new study created in memory with name: no-name-2ae91358-c80f-4032-9878-d62e589bd4af[0m
[32m[I 2021-08-16 12:58:32,083][0m Trial 0 finished with value: 0.4960148772084476 and parameters: {'dim': 8, 'depth': 6, 'heads': 4, 'attn_dropout': 0, 'ff_dropout': 0.3, 'mlp_activation': 'relu', 'mlp_1_dim': 128, 'mlp_2_dim': 32, 'learning_rate': 0.027794021760071407}. Best is trial 0 with value: 0.4960148772084476.[0m


0.4960148772084476


[32m[I 2021-08-16 12:58:57,956][0m Trial 1 finished with value: 0.4528914646101807 and parameters: {'dim': 4, 'depth': 2, 'heads': 4, 'attn_dropout': 0.4, 'ff_dropout': 0, 'mlp_activation': 'selu', 'mlp_1_dim': 256, 'mlp_2_dim': 64, 'learning_rate': 0.01979513325467289}. Best is trial 1 with value: 0.4528914646101807.[0m


0.4528914646101807


[32m[I 2021-08-16 12:59:19,050][0m Trial 2 finished with value: 0.4960526883650339 and parameters: {'dim': 8, 'depth': 2, 'heads': 2, 'attn_dropout': 0.4, 'ff_dropout': 0.4, 'mlp_activation': 'relu', 'mlp_1_dim': 256, 'mlp_2_dim': 16, 'learning_rate': 0.014691151008679938}. Best is trial 1 with value: 0.4528914646101807.[0m


0.4960526883650339


[32m[I 2021-08-16 12:59:44,859][0m Trial 3 finished with value: 2.190167800138477 and parameters: {'dim': 4, 'depth': 1, 'heads': 8, 'attn_dropout': 0.3, 'ff_dropout': 0.3, 'mlp_activation': 'selu', 'mlp_1_dim': 256, 'mlp_2_dim': 32, 'learning_rate': 0.049978573580817545}. Best is trial 1 with value: 0.4528914646101807.[0m


2.190167800138477


[32m[I 2021-08-16 13:01:18,770][0m Trial 4 finished with value: 0.49626048502722564 and parameters: {'dim': 4, 'depth': 12, 'heads': 2, 'attn_dropout': 0.4, 'ff_dropout': 0.2, 'mlp_activation': 'selu', 'mlp_1_dim': 256, 'mlp_2_dim': 16, 'learning_rate': 0.0841481904770034}. Best is trial 1 with value: 0.4528914646101807.[0m


0.49626048502722564


[32m[I 2021-08-16 13:02:28,214][0m Trial 5 finished with value: 0.3041871782806189 and parameters: {'dim': 4, 'depth': 6, 'heads': 4, 'attn_dropout': 0.4, 'ff_dropout': 0.2, 'mlp_activation': 'relu', 'mlp_1_dim': 128, 'mlp_2_dim': 32, 'learning_rate': 0.04965999535284936}. Best is trial 5 with value: 0.3041871782806189.[0m


0.3041871782806189


[32m[I 2021-08-16 13:04:58,945][0m Trial 6 finished with value: 0.3166744749299896 and parameters: {'dim': 4, 'depth': 12, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.4, 'mlp_activation': 'relu', 'mlp_1_dim': 128, 'mlp_2_dim': 32, 'learning_rate': 0.00645356059907557}. Best is trial 5 with value: 0.3041871782806189.[0m


0.3166744749299896


[32m[I 2021-08-16 13:05:22,432][0m Trial 7 finished with value: 0.4963896583187519 and parameters: {'dim': 8, 'depth': 1, 'heads': 4, 'attn_dropout': 0, 'ff_dropout': 0.4, 'mlp_activation': 'relu', 'mlp_1_dim': 128, 'mlp_2_dim': 32, 'learning_rate': 0.084246236126458}. Best is trial 5 with value: 0.3041871782806189.[0m


0.4963896583187519


[32m[I 2021-08-16 13:07:51,856][0m Trial 8 finished with value: 0.2979836569360411 and parameters: {'dim': 4, 'depth': 6, 'heads': 8, 'attn_dropout': 0.3, 'ff_dropout': 0.3, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 32, 'learning_rate': 0.012252100152243872}. Best is trial 8 with value: 0.2979836569360411.[0m


0.2979836569360411


[32m[I 2021-08-16 13:09:18,312][0m Trial 9 finished with value: 0.49684513975362476 and parameters: {'dim': 2, 'depth': 12, 'heads': 2, 'attn_dropout': 0.4, 'ff_dropout': 0.4, 'mlp_activation': 'relu', 'mlp_1_dim': 128, 'mlp_2_dim': 16, 'learning_rate': 0.014261901169394221}. Best is trial 8 with value: 0.2979836569360411.[0m


0.49684513975362476


[32m[I 2021-08-16 13:10:15,578][0m Trial 10 finished with value: 6.416231864231361 and parameters: {'dim': 2, 'depth': 3, 'heads': 8, 'attn_dropout': 0.2, 'ff_dropout': 0.1, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 64, 'learning_rate': 0.42895802645326947}. Best is trial 8 with value: 0.2979836569360411.[0m


6.416231864231361


[32m[I 2021-08-16 13:12:37,298][0m Trial 11 finished with value: 0.2969554478414453 and parameters: {'dim': 4, 'depth': 6, 'heads': 8, 'attn_dropout': 0.1, 'ff_dropout': 0.5, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 32, 'learning_rate': 0.0052039954462104075}. Best is trial 11 with value: 0.2969554478414453.[0m


0.2969554478414453


[32m[I 2021-08-16 13:14:50,280][0m Trial 12 finished with value: 0.30283922625535215 and parameters: {'dim': 4, 'depth': 6, 'heads': 8, 'attn_dropout': 0.1, 'ff_dropout': 0.5, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 32, 'learning_rate': 0.005026686474794017}. Best is trial 11 with value: 0.2969554478414453.[0m


0.30283922625535215


[32m[I 2021-08-16 13:17:03,552][0m Trial 13 finished with value: 0.297281371185418 and parameters: {'dim': 4, 'depth': 6, 'heads': 8, 'attn_dropout': 0.1, 'ff_dropout': 0.5, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 32, 'learning_rate': 0.007696136223857897}. Best is trial 11 with value: 0.2969554478414453.[0m


0.297281371185418


[32m[I 2021-08-16 13:19:17,747][0m Trial 14 finished with value: 0.3009790027662831 and parameters: {'dim': 4, 'depth': 6, 'heads': 8, 'attn_dropout': 0.1, 'ff_dropout': 0.5, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 64, 'learning_rate': 0.007845505019693777}. Best is trial 11 with value: 0.2969554478414453.[0m


0.3009790027662831


[32m[I 2021-08-16 13:20:05,910][0m Trial 15 finished with value: 1.0563159668668125 and parameters: {'dim': 2, 'depth': 3, 'heads': 8, 'attn_dropout': 0.1, 'ff_dropout': 0.5, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 32, 'learning_rate': 0.26497200601294635}. Best is trial 11 with value: 0.2969554478414453.[0m


1.0563159668668125


[32m[I 2021-08-16 13:22:12,690][0m Trial 16 finished with value: 0.30149461763270063 and parameters: {'dim': 4, 'depth': 6, 'heads': 8, 'attn_dropout': 0.1, 'ff_dropout': 0.5, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 32, 'learning_rate': 0.009585032382502306}. Best is trial 11 with value: 0.2969554478414453.[0m


0.30149461763270063


[32m[I 2021-08-16 13:24:19,297][0m Trial 17 finished with value: 0.29787149271741253 and parameters: {'dim': 4, 'depth': 6, 'heads': 8, 'attn_dropout': 0.1, 'ff_dropout': 0.5, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 32, 'learning_rate': 0.03238815942049266}. Best is trial 11 with value: 0.2969554478414453.[0m


0.29787149271741253


[32m[I 2021-08-16 13:25:52,981][0m Trial 18 finished with value: 0.42871598965763874 and parameters: {'dim': 2, 'depth': 6, 'heads': 8, 'attn_dropout': 0.5, 'ff_dropout': 0.1, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 64, 'learning_rate': 0.005770561330846152}. Best is trial 11 with value: 0.2969554478414453.[0m


0.42871598965763874


[32m[I 2021-08-16 13:26:51,559][0m Trial 19 finished with value: 0.4282102126921229 and parameters: {'dim': 8, 'depth': 2, 'heads': 8, 'attn_dropout': 0.2, 'ff_dropout': 0, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 16, 'learning_rate': 0.13805196600484013}. Best is trial 11 with value: 0.2969554478414453.[0m


0.4282102126921229
Number of finished trials: 20
Best trial: {'dim': 4, 'depth': 6, 'heads': 8, 'attn_dropout': 0.1, 'ff_dropout': 0.5, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 32, 'learning_rate': 0.0052039954462104075}
Best value: 0.2969554478414453


In [9]:
import json
params = study.best_trial.params
with open('../hyperparameters.txt', 'w') as outfile:
    json.dump(params, outfile)

In [10]:
params

{'dim': 4,
 'depth': 6,
 'heads': 8,
 'attn_dropout': 0.1,
 'ff_dropout': 0.5,
 'mlp_activation': 'relu',
 'mlp_1_dim': 64,
 'mlp_2_dim': 32,
 'learning_rate': 0.0052039954462104075}