In [1]:
from config import config
from feature_generation import get_all_atms_feature_set
from preprocessing import get_input_sets, scaler_fit_transform, scaler_transform, scaler_inverse_transform
from tabTransformer import TabTransformer
from misc import nmae_error, load_pickle

import optuna
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

## Load Data

In [2]:
load_config = config['load_config']
clusters = load_config['clusters']

df = pd.read_csv(load_config['path'])
all_atms_feature_set = get_all_atms_feature_set(df, first_n = load_config['n_atms'])
all_atms_feature_set.sort_index(inplace = True)

# Reading Pickles
for cluster_feature in clusters:
    d = load_pickle(clusters[cluster_feature]['path'])
    all_atms_feature_set[cluster_feature] = all_atms_feature_set['AtmId'].map(d)

## Setting Features

In [3]:
feature_config  = config['feature_config']

categorical_features = [cat for cat in
    all_atms_feature_set.select_dtypes(include=feature_config['categorical_column_types'])
    if cat not in feature_config['excluded_categorical']]
continuous_features = [cat for cat in
    all_atms_feature_set.select_dtypes(include=feature_config['continuous_column_types'])
    if cat not in feature_config['excluded_continuous']]

groups = [continuous_features]
groups.extend(categorical_features)

## Aranging train/test Data

In [4]:
X = all_atms_feature_set[continuous_features + categorical_features]
y = all_atms_feature_set[feature_config['target']]

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False)

# MinMaxTransform
X_train, y_train, scaler_X, scaler_y = scaler_fit_transform(X_train, y_train, continuous_features)
X_test, y_test = scaler_transform(X_test, y_test, scaler_X, scaler_y, continuous_features)

X_train = get_input_sets(X_train, groups)
X_test  = get_input_sets(X_test, groups)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[numerical_features] = scaler_X.transform(X[numerical_features])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value[:, i].tolist(), pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[numerical_features] = scaler_X.transform(X[numerical_features])
A value is try

## Optuna

In [5]:
def objective(trial):

    param = {
        'dim': trial.suggest_categorical('dim', [32,64]),
        'depth': trial.suggest_categorical('depth', [1,2,3,6,12]),
        'heads': trial.suggest_categorical('heads', [2,4,8]),
        'attn_dropout': trial.suggest_categorical('attn_dropout', [0,0.1,0.2,0.3,0.4,0.5]),
        'ff_dropout': trial.suggest_categorical('ff_dropout', [0,0.1,0.2,0.3,0.4,0.5])
    }

    mlp_activation = trial.suggest_categorical('mlp_activation', ['relu', 'selu'])
    mlp_hidden = [
        (trial.suggest_categorical('mlp_1_dim', [256, 128, 64]), mlp_activation),
        (trial.suggest_categorical('mlp_2_dim', [64, 32, 16]), mlp_activation),
    ]

    tabTransformer = TabTransformer(
        categories = [len(all_atms_feature_set[cat].unique())
        if cat not in clusters.keys() else
        clusters[cat]['n_clusters']
        for cat in categorical_features],
        num_continuous = len(continuous_features),
        dim_out=1,
        mlp_hidden=mlp_hidden,
        **param
    )

    training_config = config['training_config']

    tabTransformer.compile(
        optimizer = tf.optimizers.Adam(learning_rate = trial.suggest_loguniform('learning_rate', 0.005, 0.5)),
        loss = training_config['loss']
    )

    tabTransformer.fit(
        X_train,
        y_train,
        epochs = 5,
        batch_size = training_config['batch_size'],
        verbose = False)

    error = nmae_error(scaler_inverse_transform(y_test, scaler_y), scaler_y.inverse_transform(tabTransformer.predict(X_test)))
    print(error)

    return error

In [14]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)


print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
print('Best value:', study.best_value)

[32m[I 2021-08-10 09:53:43,031][0m A new study created in memory with name: no-name-9b9e42e2-968d-4da0-88e3-c69335c8f801[0m
[32m[I 2021-08-10 09:54:05,440][0m Trial 0 finished with value: 0.49498965698860153 and parameters: {'dim': 32, 'depth': 1, 'heads': 8, 'attn_dropout': 0.3, 'ff_dropout': 0.5, 'mlp_activation': 'selu', 'mlp_1_dim': 128, 'mlp_2_dim': 16, 'learning_rate': 0.01316667365976463}. Best is trial 0 with value: 0.49498965698860153.[0m


0.49498965698860153


[32m[I 2021-08-10 09:54:23,839][0m Trial 1 finished with value: 0.4793972493396823 and parameters: {'dim': 64, 'depth': 2, 'heads': 2, 'attn_dropout': 0, 'ff_dropout': 0.1, 'mlp_activation': 'relu', 'mlp_1_dim': 128, 'mlp_2_dim': 32, 'learning_rate': 0.017904088592949254}. Best is trial 1 with value: 0.4793972493396823.[0m


0.4793972493396823


[32m[I 2021-08-10 09:54:36,948][0m Trial 2 finished with value: 0.3310462785938368 and parameters: {'dim': 64, 'depth': 1, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.5, 'mlp_activation': 'selu', 'mlp_1_dim': 128, 'mlp_2_dim': 16, 'learning_rate': 0.006973916021973657}. Best is trial 2 with value: 0.3310462785938368.[0m


0.3310462785938368


[32m[I 2021-08-10 09:54:45,009][0m Trial 3 finished with value: 0.4786814282388371 and parameters: {'dim': 32, 'depth': 1, 'heads': 4, 'attn_dropout': 0.2, 'ff_dropout': 0, 'mlp_activation': 'relu', 'mlp_1_dim': 128, 'mlp_2_dim': 16, 'learning_rate': 0.026678362070684344}. Best is trial 2 with value: 0.3310462785938368.[0m


0.4786814282388371


[32m[I 2021-08-10 09:56:50,980][0m Trial 4 finished with value: 18.08585523109179 and parameters: {'dim': 32, 'depth': 12, 'heads': 8, 'attn_dropout': 0.1, 'ff_dropout': 0.5, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 32, 'learning_rate': 0.39344353699195006}. Best is trial 2 with value: 0.3310462785938368.[0m


18.08585523109179


[32m[I 2021-08-10 09:57:02,975][0m Trial 5 finished with value: 2.847100709574449 and parameters: {'dim': 32, 'depth': 1, 'heads': 8, 'attn_dropout': 0.4, 'ff_dropout': 0.4, 'mlp_activation': 'relu', 'mlp_1_dim': 256, 'mlp_2_dim': 32, 'learning_rate': 0.0971540786783022}. Best is trial 2 with value: 0.3310462785938368.[0m


2.847100709574449


[32m[I 2021-08-10 09:57:13,042][0m Trial 6 finished with value: 3.5994465899792822 and parameters: {'dim': 64, 'depth': 1, 'heads': 2, 'attn_dropout': 0.1, 'ff_dropout': 0, 'mlp_activation': 'selu', 'mlp_1_dim': 128, 'mlp_2_dim': 32, 'learning_rate': 0.06173783293089919}. Best is trial 2 with value: 0.3310462785938368.[0m


3.5994465899792822


[32m[I 2021-08-10 09:58:01,814][0m Trial 7 finished with value: 5.361227618779861 and parameters: {'dim': 32, 'depth': 12, 'heads': 2, 'attn_dropout': 0.3, 'ff_dropout': 0.3, 'mlp_activation': 'relu', 'mlp_1_dim': 256, 'mlp_2_dim': 32, 'learning_rate': 0.21947852263307907}. Best is trial 2 with value: 0.3310462785938368.[0m


5.361227618779861


[32m[I 2021-08-10 09:58:18,599][0m Trial 8 finished with value: 1.3495839418065185 and parameters: {'dim': 64, 'depth': 2, 'heads': 2, 'attn_dropout': 0.4, 'ff_dropout': 0.1, 'mlp_activation': 'relu', 'mlp_1_dim': 64, 'mlp_2_dim': 64, 'learning_rate': 0.14270224319163133}. Best is trial 2 with value: 0.3310462785938368.[0m


1.3495839418065185


[32m[I 2021-08-10 09:59:37,963][0m Trial 9 finished with value: 0.35396673655341976 and parameters: {'dim': 32, 'depth': 12, 'heads': 4, 'attn_dropout': 0, 'ff_dropout': 0.1, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 16, 'learning_rate': 0.019081238153226737}. Best is trial 2 with value: 0.3310462785938368.[0m


0.35396673655341976


[32m[I 2021-08-10 10:00:14,767][0m Trial 10 finished with value: 0.3876400030411554 and parameters: {'dim': 64, 'depth': 3, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.2, 'mlp_activation': 'selu', 'mlp_1_dim': 128, 'mlp_2_dim': 64, 'learning_rate': 0.005336403650658574}. Best is trial 2 with value: 0.3310462785938368.[0m


0.3876400030411554


[32m[I 2021-08-10 10:01:19,014][0m Trial 11 finished with value: 0.3354821053703796 and parameters: {'dim': 64, 'depth': 6, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.1, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 16, 'learning_rate': 0.005495859250002705}. Best is trial 2 with value: 0.3310462785938368.[0m


0.3354821053703796


[32m[I 2021-08-10 10:02:19,854][0m Trial 12 finished with value: 0.4051694964346142 and parameters: {'dim': 64, 'depth': 6, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.5, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 16, 'learning_rate': 0.005480033019951941}. Best is trial 2 with value: 0.3310462785938368.[0m


0.4051694964346142


[32m[I 2021-08-10 10:03:19,944][0m Trial 13 finished with value: 0.32449295287244573 and parameters: {'dim': 64, 'depth': 6, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.3, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 16, 'learning_rate': 0.009197764665216707}. Best is trial 13 with value: 0.32449295287244573.[0m


0.32449295287244573


[32m[I 2021-08-10 10:04:20,632][0m Trial 14 finished with value: 0.49604849757380753 and parameters: {'dim': 64, 'depth': 6, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.3, 'mlp_activation': 'selu', 'mlp_1_dim': 256, 'mlp_2_dim': 16, 'learning_rate': 0.011336858848976608}. Best is trial 13 with value: 0.32449295287244573.[0m


0.49604849757380753


[32m[I 2021-08-10 10:04:52,869][0m Trial 15 finished with value: 0.47897693771665756 and parameters: {'dim': 64, 'depth': 3, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.3, 'mlp_activation': 'selu', 'mlp_1_dim': 128, 'mlp_2_dim': 16, 'learning_rate': 0.04037885122314331}. Best is trial 13 with value: 0.32449295287244573.[0m


0.47897693771665756


[32m[I 2021-08-10 10:05:57,400][0m Trial 16 finished with value: 0.5159624009576634 and parameters: {'dim': 64, 'depth': 6, 'heads': 4, 'attn_dropout': 0.2, 'ff_dropout': 0.2, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 64, 'learning_rate': 0.009689419627341142}. Best is trial 13 with value: 0.32449295287244573.[0m


0.5159624009576634


[32m[I 2021-08-10 10:06:09,823][0m Trial 17 finished with value: 0.4514522285186496 and parameters: {'dim': 64, 'depth': 1, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.4, 'mlp_activation': 'selu', 'mlp_1_dim': 128, 'mlp_2_dim': 16, 'learning_rate': 0.00826092226028186}. Best is trial 13 with value: 0.32449295287244573.[0m


0.4514522285186496


[32m[I 2021-08-10 10:07:15,347][0m Trial 18 finished with value: 0.4791465482005055 and parameters: {'dim': 64, 'depth': 6, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.5, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 16, 'learning_rate': 0.033778457868703}. Best is trial 13 with value: 0.32449295287244573.[0m


0.4791465482005055


[32m[I 2021-08-10 10:08:00,747][0m Trial 19 finished with value: 10.574422360584437 and parameters: {'dim': 64, 'depth': 2, 'heads': 8, 'attn_dropout': 0.5, 'ff_dropout': 0.3, 'mlp_activation': 'selu', 'mlp_1_dim': 256, 'mlp_2_dim': 16, 'learning_rate': 0.06602838449972318}. Best is trial 13 with value: 0.32449295287244573.[0m


10.574422360584437
Number of finished trials: 20
Best trial: {'dim': 64, 'depth': 6, 'heads': 4, 'attn_dropout': 0.5, 'ff_dropout': 0.3, 'mlp_activation': 'selu', 'mlp_1_dim': 64, 'mlp_2_dim': 16, 'learning_rate': 0.009197764665216707}
Best value: 0.32449295287244573


In [15]:
import json
params = study.best_trial.params
with open('hyperparameters.txt', 'w') as outfile:
    json.dump(params, outfile)

In [16]:
params

{'dim': 64,
 'depth': 6,
 'heads': 4,
 'attn_dropout': 0.5,
 'ff_dropout': 0.3,
 'mlp_activation': 'selu',
 'mlp_1_dim': 64,
 'mlp_2_dim': 16,
 'learning_rate': 0.009197764665216707}