# Imports

In [1]:
import sys
import pandas as pd

# to save results to data directory
module_path = ".."
if module_path not in sys.path:
    sys.path.insert(1, module_path)
# increase displayed columns in jupyter notebook
pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 300)

In [2]:
import dill
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import lightgbm as lgbm
import h2o
from h2o.automl import H2OAutoML

from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault
from pytorch_widedeep.preprocessing import TabPreprocessor
from pytorch_widedeep.training import Trainer
from pytorch_widedeep.models import TabMlp, WideDeep
from pytorch_widedeep.bayesian_models import BayesianTabMlp
from pytorch_widedeep.models.transformers.saint import SAINT
from pytorch_widedeep.callbacks import (
    EarlyStopping,
    ModelCheckpoint,
    LRHistory,
    RayTuneReporter,
)
from pytorch_widedeep.initializers import (
    KaimingNormal,
    KaimingUniform,
    XavierNormal,
    XavierUniform,
    Normal,
    Uniform,
)
from pytorch_widedeep import Tab2Vec
from pytorch_widedeep.optim import RAdam
import torch
from torch.optim import Adam, SGD, lr_scheduler#, NAdam

from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune import JupyterNotebookReporter
from ray.tune.integration.wandb import WandbLoggerCallback, wandb_mixin
from ray.tune.logger import DEFAULT_LOGGERS
import wandb

import src.utils as utils
import src.common as common

import tracemalloc
tracemalloc.start()

2022-01-10 22:09:28.218228: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-10 22:09:28.218387: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Dataset

**identifiers**

In [3]:
column_types = common.json_load("#datasets/Colab_PowerConverter/column_types.json")
target = column_types["target"]
identifier = column_types["identifier"]
measurement_label = column_types["measurement_label"]

parameters = {
    "random_state": 1,
    "valid_size": 0.2,
    "test_size": 0.5,
    "scaler_mapper_def": {
        "target_col": None,
        "identifier_col": None,
        "cont_cols": StandardScaler,
    },
}

valid_size = parameters["valid_size"]
test_size = parameters["test_size"]
scaler_mapper_def = parameters["scaler_mapper_def"]
random_state = parameters["random_state"]
test_n_valid_combined = True
task = "multiclass" #(or "binary")

In [4]:
df = pd.read_pickle("#datasets/Colab_PowerConverter/dataset.pkl")

In [5]:
# this measurement did not have a fault (?)
df = df[df[measurement_label]!="Single-Phase_Sensor_Fault"]
df.reset_index(inplace=True, drop=True)

In [6]:
fault_dict = {}
for label,i in zip(df[measurement_label].unique(), range(len(df[measurement_label].unique()))):
    df.loc[(df[measurement_label]==label) & (df[target]==1), target] = int(i+1)
    fault_dict[label] = int(i+1)

In [7]:
# imbalance of the classes
df[target].value_counts()

0     597599
5      40014
3      40001
6      40001
7      40001
8      40001
9      40001
10     40001
11     40001
13     40001
1      38971
2      38971
4       3166
12      1335
Name: fault, dtype: int64

In [8]:
fault_dict

{'Damping-320': 1,
 'Damping-32000': 2,
 'Inertia-1.2': 3,
 'LL_Fault': 4,
 'Three-Phase_Sensor_Fault': 5,
 'Weak_Grid-4_5_mH': 6,
 'Weak_Grid-1_5_mH': 7,
 'Damping-3200': 8,
 'Inertia-0.2': 9,
 'Inertia-2': 10,
 'Single_Phase_Sag': 11,
 'Three_Phase_Grid_Fault': 12,
 'Weak_Grid-7_5_mH': 13}

# Preprocessing

In [9]:
df.drop(columns=[measurement_label], inplace=True)

In [10]:
df_train, df_valid = train_test_split(df, test_size=valid_size, stratify=df[target], random_state=random_state)
df_valid, df_test = train_test_split(df_valid, test_size=test_size, stratify=df_valid[target], random_state=random_state)

df_train.reset_index(inplace=True, drop=True)
df_valid.reset_index(inplace=True, drop=True)
df_test.reset_index(inplace=True, drop=True)

In [11]:
cont_cols = df.drop(columns=[target,identifier]).columns.values
scaler = utils.scaler_mapper(
    cont_cols=cont_cols,
    target_col=target,
    identifier=identifier,
    scaler_mapper_def=scaler_mapper_def,
)

df_train_scaled = scaler.fit_transform(df_train)
df_test_scaled = scaler.transform(df_test)
df_valid_scaled = scaler.transform(df_valid)

## H2O AutoML

In [44]:
h2o_test

f_c,P,m_d,m_q,theta,P_ref,V_DC,V_phaseA,V_phaseB,V_phaseC,I_phaseA,I_phaseB,I_phaseC,fault,sample_id
0.0764099,-0.0644905,0,0,0.103179,0,0,0.0136144,-1.25391,1.23911,0.00159406,-0.002035,0.000468371,0,1069660.0
0.0764093,-0.06449,0,0,0.670499,0,0,-1.21108,-0.0848593,1.28534,-0.0250353,0.0462085,-0.0228063,7,614815.0
0.0764093,-0.06449,0,0,1.03229,0,0,-0.717513,-0.727768,1.4383,-0.0400163,0.0394656,0.000842797,7,630845.0
0.0764093,-0.06449,0,0,-0.0804041,0,0,-1.25339,1.24611,-0.00229003,0.00109732,-0.000931532,-0.00018652,0,741526.0
0.0766861,-0.0647668,0,0,0.611718,0,0,0.00023845,-0.00149468,0.00126347,0.035414,-0.00863098,-0.0292607,5,452148.0
-15.9596,15.978,0,0,-3.04602,0,0,0.53551,0.89073,-1.42061,0.00213073,0.0130485,-0.0164733,0,1003440.0
0.0766861,-0.0647668,0,0,1.24128,0,0,0.00023845,-0.00149468,0.00126347,-0.0308137,-0.0729725,0.112734,5,479973.0
-0.0550365,-0.05134,0,0,-2.16788,0,0,1.21972,-1.28017,0.0697169,0.0838388,-0.00528121,-0.0857018,1,75794.0
0.0764147,-0.064495,0,0,0.781905,0,0,1.32264,-1.16804,-0.144324,-0.00410528,-0.0110269,0.0164344,6,539733.0
0.0764093,-0.06449,0,0,-3.30012,0,0,-0.80531,1.43398,-0.634138,0.00130697,-0.00087162,-0.000480115,0,1033900.0




In [50]:
# initialize H2O
h2o.init(log_dir="h2o_logs", log_level="WARN")

# read as h2o file
print("Reading data into H2O format")
h2o_train = h2o.H2OFrame(df_train_scaled)
h2o_valid = h2o.H2OFrame(df_valid_scaled)
h2o_test = h2o.H2OFrame(df_test_scaled)

# For binary classification, response should be a factor
h2o_train[target] = h2o_train[target].asfactor()
h2o_valid[target] = h2o_valid[target].asfactor()
h2o_test[target] = h2o_test[target].asfactor()

# Define AML task
aml = H2OAutoML(seed=random_state, max_runtime_secs=1800)

# over/under sample for classification tasks
aml.balance_classes = True

# Run it
_ = aml.train(
    x=list(cont_cols),
    y=target,
    training_frame=h2o_train,
    leaderboard_frame=h2o_valid,
)

m = aml.get_best_model()

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,26 mins 38 secs
H2O_cluster_timezone:,Europe/Madrid
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.34.0.7
H2O_cluster_version_age:,16 days
H2O_cluster_name:,H2O_from_python_palo_ku3umh
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,2.520 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Reading data into H2O format


  return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
  File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
    return {os.path.basename(absfilename): open(absfilename, "rb")}


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |

  return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
  File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
    return {os.path.basename(absfilename): open(absfilename, "rb")}


████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |

  return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
  File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
    return {os.path.basename(absfilename): open(absfilename, "rb")}


████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |
23:33:05.829: _train param, Dropping bad and constant columns: [P_ref, V_DC, m_d, m_q]

██████████████████████
23:43:03.261: _train param, Dropping bad and constant columns: [P_ref, V_DC, m_d, m_q]

█████████████████████████████
23:57:25.774: _train param, Dropping bad and constant columns: [P_ref, V_DC, m_d, m_q]

Failed polling AutoML progress log: HTTP 500 Server Error:
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
<title>Error 500 Server Error</title>
</head>
<body><h2>HTTP ERROR 500</h2>
<p>Problem accessing /99/AutoML/AutoML_4_20220106_233305@@fault. Reason:
<pre>    Server Error</pre></p><h3>Caused by:</h3><pre>java.lang.OutOfMemoryError: Java heap space
</pre>

</body>
</html>

█
23:57:46.590: GBM_1_AutoML_4_20220106_233305 [GBM def_5] failed: DistributedException from /127.0.0.1:54321: 'Java heap space', caused by java.lang.OutOfMemoryError: Java

  return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
  File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
    return {os.path.basename(absfilename): open(absfilename, "rb")}


H2OResponseError: Server error java.lang.IllegalArgumentException:
  Error: Total input file size of  436  B is much larger than total cluster memory of Zero  , please use either a larger cluster or smaller data.
  Request: POST /3/Parse
    data: {'destination_frame': 'AutoML_4_20220106_233305_leaderboard', 'parse_type': 'CSV', 'separator': '44', 'check_header': '1', 'number_columns': '6', 'chunk_size': '4194304', 'delete_on_done': 'True', 'blocking': 'False', 'column_types': '["string","string","double","double","double","double"]', 'single_quotes': 'False', 'escapechar': '0', 'column_names': '["","model_id","mean_per_class_error","logloss","rmse","mse"]', 'source_frames': '["upload_8a591791a2d0d184c45385b51edf4c66"]'}


In [51]:
lb = h2o.automl.get_leaderboard(aml, extra_columns="ALL")

  return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
  File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
    return {os.path.basename(absfilename): open(absfilename, "rb")}


H2OResponseError: Server error java.lang.IllegalArgumentException:
  Error: Total input file size of  577  B is much larger than total cluster memory of Zero  , please use either a larger cluster or smaller data.
  Request: POST /3/Parse
    data: {'destination_frame': 'AutoML_4_20220106_233305_custom_leaderboard', 'parse_type': 'CSV', 'separator': '44', 'check_header': '1', 'number_columns': '9', 'chunk_size': '4194304', 'delete_on_done': 'True', 'blocking': 'False', 'column_types': '["string","string","double","double","double","double","long","double","string"]', 'single_quotes': 'False', 'escapechar': '0', 'column_names': '["","model_id","mean_per_class_error","logloss","rmse","mse","training_time_ms","predict_time_per_row_ms","algo"]', 'source_frames': '["upload_b2adc2f5455e83f4133392c81f524ffb"]'}


In [52]:
print(lb)

This H2OFrame is empty.



In [None]:
# Leaderboard, show and save
lb = h2o.automl.get_leaderboard(aml, extra_columns="ALL")
print(lb)
# save results
h2o.export_file(lb, path="h2o_logs/leaderboard.csv"), force=True)

print(m.model_performance(h2o_valid))

# MOJO is h2o version agnostic
model_path = os.path.join(session_dir_path, "bestmodel.zip")
m.save_mojo("h2o_logs/bestmodel.zip")

predictions = m.predict(h2o_test)

# LightGBM

## Metric and objective functions

In [18]:
def focal_loss_lgb(y_pred, dtrain, alpha, gamma, num_class):
    """
    Focal Loss for lightgbm

    Parameters:
    -----------
    y_pred: numpy.ndarray
        array with the predictions
    dtrain: lightgbm.Dataset
    alpha, gamma: float
        See original paper https://arxiv.org/pdf/1708.02002.pdf
    num_class: int
        number of classes
    """
    a,g = alpha, gamma
    y_true = dtrain.label
    # N observations x num_class arrays
    y_true = np.eye(num_class)[y_true.astype('int')]
    y_pred = y_pred.reshape(-1,num_class, order='F')
    # alpha and gamma multiplicative factors with BCEWithLogitsLoss
    def fl(x,t):
        p = 1/(1+np.exp(-x))
        return -( a*t + (1-a)*(1-t) ) * (( 1 - ( t*p + (1-t)*(1-p)) )**g) * ( t*np.log(p)+(1-t)*np.log(1-p) )
    partial_fl = lambda x: fl(x, y_true)
    grad = derivative(partial_fl, y_pred, n=1, dx=1e-6)
    hess = derivative(partial_fl, y_pred, n=2, dx=1e-6)
    # flatten in column-major (Fortran-style) order
    return grad.flatten('F'), hess.flatten('F')

def focal_loss_lgb_eval_error(y_pred, dtrain, alpha, gamma, num_class):
    """
    Focal Loss for lightgbm

    Parameters:
    -----------
    y_pred: numpy.ndarray
        array with the predictions
    dtrain: lightgbm.Dataset
    alpha, gamma: float
        See original paper https://arxiv.org/pdf/1708.02002.pdf
    num_class: int
        number of classes
    """
    a,g = alpha, gamma
    y_true = dtrain.label
    y_true = np.eye(num_class)[y_true.astype('int')]
    y_pred = y_pred.reshape(-1, num_class, order='F')
    p = 1/(1+np.exp(-y_pred))
    loss = -( a*y_true + (1-a)*(1-y_true) ) * (( 1 - ( y_true*p + (1-y_true)*(1-p)) )**g) * ( y_true*np.log(p)+(1-y_true)*np.log(1-p) )
    # a variant can be np.sum(loss)/num_class
    return 'focal_loss', np.mean(loss), False

In [12]:
test_n_valid_combined = True
n_class = pd.concat([df_train_scaled, df_valid_scaled, df_test_scaled])[target].nunique()

#config = {"verbose": -1}
config = {}
# config["is_unbalance"] = True
# config["objective"] = "multiclass"
config["num_classes"] = n_class

custom = utils.LGBM_custom_score(n_class=n_class)
fobj = lambda preds, data: custom.lgbm_focal_loss(preds, data, 0.25, 1.0)
#feval = lambda preds, data: focal_loss_lgb_eval_error(preds, data, 0.25, 1.0, n_class)
feval = [
    lambda preds, data: [
        custom.lgbm_focal_loss_eval(preds, data, 0.25, 1.0),
         custom.lgbm_f1(preds, data),
         custom.lgbm_precision(preds, data),
         custom.lgbm_recall(preds, data),
         custom.lgbm_accuracy(preds, data),
    ]
]
#ray_metric = "-" + "focal_loss"

## Datasets

In [13]:
lgbtrain = lgbm.Dataset(
    df_train.drop(columns=[target]+[identifier]),
    df_train[target],
    free_raw_data=False,
)
lgbvalid = lgbm.Dataset(
    df_valid.drop(columns=[target]+[identifier]),
    df_valid[target],
    reference=lgbtrain,
    free_raw_data=False,
)

if test_n_valid_combined:
    df_testNvalid_enc = pd.concat([df_valid, df_test]).reset_index(
        drop=True
    )
    lgbtest = lgbm.Dataset(
        df_testNvalid_enc.drop(columns=[target]+[identifier]),
        df_testNvalid_enc[target],
        free_raw_data=False,
    )
else:
    lgbtest = lgbm.Dataset(
        df_test.drop(columns=[target]+[identifier]),
        df_test[target],
        reference=lgbtrain,
        free_raw_data=False,
    )

## Train model

In [14]:
%%time
model = lgbm.train(
    config,
    lgbtrain,
    valid_sets=[lgbvalid],
    fobj=fobj,
    feval=feval,
)

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2295
[LightGBM] [Info] Number of data points in the train set: 832051, number of used features: 9
[1]	valid_0's focal_loss: 0.215605	valid_0's f1: 0.887934	valid_0's precision: 0.888266	valid_0's recall_0: 0.890163	valid_0's accuracy: 0.888266
[2]	valid_0's focal_loss: 0.188667	valid_0's f1: 0.889244	valid_0's precision: 0.889689	valid_0's recall_0: 0.894021	valid_0's accuracy: 0.889689
[3]	valid_0's focal_loss: 0.165771	valid_0's f1: 0.897054	valid_0's precision: 0.895756	valid_0's recall_0: 0.900642	valid_0's accuracy: 0.895756
[4]	valid_0's focal_loss: 0.146172	valid_0's f1: 0.898813	valid_0's precision: 0.897189	valid_0's recall_0: 0.902035	valid_0's accuracy: 0.897189
[5]	valid_0's focal_loss: 0.129293	valid_0's f1: 0.89918	valid_0's precision: 0.897891	valid_0's recall_0: 0.901601	valid_0's accuracy: 0.897891


KeyboardInterrupt: 

## Prediction & Evaluation

In [79]:
predicted = model.predict(lgbtest.data).argmax(axis=1)
actual = lgbtest.label
print(classification_report(predicted, actual))

              precision    recall  f1-score   support

           0       0.97      0.92      0.95    126433
           1       0.10      0.50      0.17      1622
           2       0.49      0.78      0.60      4952
           3       0.12      0.26      0.17      3709
           4       0.00      0.00      0.00       202
           5       0.36      0.87      0.51      3298
           6       0.14      0.33      0.20      3375
           7       0.59      0.80      0.68      5892
           8       0.22      0.21      0.22      8595
           9       0.13      0.20      0.16      5105
          10       0.14      0.32      0.20      3567
          11       0.68      0.44      0.53     12306
          12       0.00      0.00      0.00        49
          13       0.79      0.22      0.34     28908

    accuracy                           0.70    208013
   macro avg       0.34      0.42      0.34    208013
weighted avg       0.80      0.70      0.72    208013



## w RayTune

In [54]:
start = time()

#config["eta"] = tune.loguniform(1e-4, 1e-1),
#config["subsample"] = tune.uniform(0.5, 1.0),
config["max_depth"] = tune.randint(1, 9),
# config["wandb"]["project"] = "GBM_classifier",
# config["wandb"]["api_key_file"] = "../data/wandb_api.key",
# config["wandb"]["log_config"] = True


def training_function(config, train, valid):
    lgbm_config = config.copy()
    #lgbm_config.pop("wandb")
    trainer = lgbm.train(
        lgbm_config,
        train,
        valid_sets=[valid],
        valid_names=[""],
        callbacks=[
            TuneReportCheckpointCallback(
                {
                    ray_metric: ray_metric,
                }
            )
        ],
    )


asha_scheduler = AsyncHyperBandScheduler(
    time_attr="training_iteration",
    metric=ray_metric,
    mode="min",
    max_t=100,
    grace_period=10,
    reduction_factor=3,
    brackets=1,
)

analysis = tune.run(
    tune.with_parameters(training_function, train=lgbtrain, valid=lgbvalid),
    # resources_per_trial={"cpu": 4, "gpu": 0},
    num_samples=2,
    progress_reporter=JupyterNotebookReporter(overwrite=True),
    scheduler=asha_scheduler,
    config=config,
    #loggers=DEFAULT_LOGGERS + (WandbLogger,),
)

Trial name,status,loc
training_function_91209_00000,ERROR,172.18.71.208:626
training_function_91209_00001,ERROR,172.18.71.208:624

Trial name,# failures,error file
training_function_91209_00000,1,/home/palo/ray_results/training_function_2021-11-08_10-38-02/training_function_91209_00000_0_2021-11-08_10-38-02/error.txt
training_function_91209_00001,1,/home/palo/ray_results/training_function_2021-11-08_10-38-02/training_function_91209_00001_1_2021-11-08_10-38-02/error.txt


TuneError: ('Trials did not complete', [training_function_91209_00000, training_function_91209_00001])

In [40]:
analysis.trial_dataframes

### Train best params model

In [None]:
runtime = time() - start
print("Optimization time:\n{}".format(runtime))

params = copy(analysis.get_best_config(ray_metric, "min"))
params.pop("wandb")
# params["n_estimators"] = 1000

start = time()
model = lgbm.train(
    params,
    flgbtrain,
    valid_sets=[lgbtest],
    callbacks=[lgbm.log_evaluation(show_stdv=False)],
)
runtime = time() - start
print("Final model training time:\n{}".format(str(datetime.timedelta(seconds=runtime))))a

### Tensorboard visualization

In [None]:
from tensorboard import notebook

notebook.list()

In [None]:
%load_ext tensorboard
%tensorboard --logdir ~/ray_results

# Deep Learning Models

In [None]:
X_train, X_valid, tab_preprocessor = utils.dl_train_prep(
    data_train=df_train_scaled,
    data_valid=df_valid_scaled,
    identifier=identifier,
    cont_cols=cont_cols,
    target_col=target,
)

test_n_valid_combined = True
# X_tab_test = tab_preprocessor.transform(data_test_scaled).astype(float)
if test_n_valid_combined:
    test = pd.concat([df_valid_scaled, df_test_scaled]).reset_index(drop=True)
else:
    test = df_test_scaled.copy()

X_test = {"X_tab": tab_preprocessor.transform(test)}

n_classes = pd.concat([df_train_scaled, df_valid_scaled, df_test_scaled])[target].nunique()
metrics = utils.dl_metrics(n_classes)


input_layer = len(tab_preprocessor.continuous_cols)
output_layer = n_classes
hidden_layers = utils.dl_design(
    input_layer, 2, output_layer, design="funnel"
).hidden_layers()

## TabMLP

In [None]:
deeptabular_net = TabMlp(
    mlp_hidden_dims=hidden_layers,
    column_idx=tab_preprocessor.column_idx,
    continuous_cols=tab_preprocessor.continuous_cols,
    mlp_batchnorm=True,
    mlp_batchnorm_last=True,
    mlp_linear_first=True,
)
model = WideDeep(deeptabular=deeptabular_net, pred_dim=output_layer)
model

In [None]:
# Initializers/Optimizers/Schedulers/callbacks
initializers = {"deeptabular": XavierNormal}
deeptab_opt = NAdam(model.deeptabular.parameters(), lr=0.001)
deeptab_sch = lr_scheduler.StepLR(deeptab_opt, step_size=5)
optimizers = {"deeptabular": deeptab_opt}
schedulers = {"deeptabular": deeptab_sch}

early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, verbose=1)

In [None]:
%%time
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced

trainer = Trainer(
    model,
    objective=objective,
    callbacks=[early_stopping, model_checkpoint],
    lr_schedulers=schedulers,
    initializers=initializers,
    optimizers=optimizers,
    metrics=metrics,
)

trainer.fit(
    X_train=X_train,
    X_val=X_valid,
    n_epochs=50,
    batch_size=1000,
    custom_dataloader=dataloader,
    oversample_mul=5,
)

In [None]:
actual = test[target_ltv]
predicted = trainer.predict(**X_test)
#predicted_mc = trainer.predict_uncertainty(**X_test, uncertainty_granularity=10)[:, -1],
classification_report(predicted, actual)

## Transformers

In [None]:
model = SAINT(
    input_dim=input_layer,
    column_idx=tab_preprocessor.column_idx,
    continuous_cols=tab_preprocessor.continuous_cols,
)
model

In [None]:
# Initializers/Optimizers/Schedulers/callbacks
initializers = {"deeptabular": XavierNormal}
deeptab_opt = NAdam(model.deeptabular.parameters(), lr=0.001)
deeptab_sch = lr_scheduler.StepLR(deeptab_opt, step_size=5)
optimizers = {"deeptabular": deeptab_opt}
schedulers = {"deeptabular": deeptab_sch}

early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, verbose=1)

In [None]:
%%time
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced

trainer = Trainer(
    model,
    objective=objective,
    callbacks=[early_stopping, model_checkpoint],
    lr_schedulers=schedulers,
    initializers=initializers,
    optimizers=optimizers,
    metrics=metrics,
)

trainer.fit(
    X_train=X_train,
    X_val=X_valid,
    n_epochs=50,
    batch_size=1000,
    custom_dataloader=dataloader,
    oversample_mul=5,
)

In [None]:
actual = test[target_ltv]
predicted = trainer.predict(**X_test)
#predicted_mc = trainer.predict_uncertainty(**X_test, uncertainty_granularity=10)[:, -1],
classification_report(predicted, actual)

## Bayes

In [None]:
model = BayesianTabMlp(
    mlp_hidden_dims=hidden_layers,
    column_idx=tab_preprocessor.column_idx,
    continuous_cols = tab_preprocessor.continuous_cols,
)
model

In [None]:
# Initializers/Optimizers/Schedulers/callbacks
initializers = {"deeptabular": XavierNormal}
deeptab_opt = NAdam(model.deeptabular.parameters(), lr=0.001)
deeptab_sch = lr_scheduler.StepLR(deeptab_opt, step_size=5)
optimizers = {"deeptabular": deeptab_opt}
schedulers = {"deeptabular": deeptab_sch}

early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, verbose=1)

In [None]:
%%time
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced

trainer = Trainer(
    model,
    objective=objective,
    callbacks=[early_stopping, model_checkpoint],
    lr_schedulers=schedulers,
    initializers=initializers,
    optimizers=optimizers,
    metrics=metrics,
)

trainer.fit(
    X_train=X_train,
    X_val=X_valid,
    n_epochs=50,
    batch_size=1000,
    custom_dataloader=dataloader,
    oversample_mul=5,
)

In [None]:
actual = test[target_ltv]
predicted = trainer.predict(**X_test)
#predicted_mc = trainer.predict_uncertainty(**X_test, uncertainty_granularity=10)[:, -1],
classification_report(predicted, actual)

## w RayTune

In [None]:
%%time
# Optimizers
deep_opt_sgd_01 = SGD(model.deeptabular.parameters(), lr=0.1)
deep_opt_sgd_001 = SGD(model.deeptabular.parameters(), lr=0.01)
deep_opt_adam_01 = Adam(model.deeptabular.parameters(), lr=0.1)
deep_opt_adam_001 = Adam(model.deeptabular.parameters(), lr=0.01)
# LR Schedulers
deep_sch_StepLR5 = lr_scheduler.StepLR(deep_opt, step_size=5)
deep_sch_StepLR10 = lr_scheduler.StepLR(deep_opt, step_size=10)

input_layer = len(tab_preprocessor.continuous_cols)
output_layer = n_classes

hidden_layers2 = utils.dl_design(input_layer, 2, output_layer, design="funnel")
hidden_layers3 = utils.dl_design(input_layer, 3, output_layer, design="funnel")
hidden_layers5 = utils.dl_design(input_layer, 5, output_layer, design="funnel")
hidden_layers10 = utils.dl_design(input_layer, 10, output_layer, design="funnel")

config = {
    "batch_size": tune.grid_search([100, 1000, 10000]),
    "deeptab_opt": tune.grid_search(
        [
            deep_opt_sgd_01,
            deep_opt_adam_01,
            deep_opt_sgd_001,
            deep_opt_adam_001,
        ]
    ),
    "deeptab_sch": tune.grid_search([deep_sch_StepLR5]),  # , deep_sch_StepLR10]),
    "hidden_layers": tune.grid_search(
        [hidden_layers2, hidden_layers3, hidden_layers5, hidden_layers10]
    ),
    "wandb": {
        "project": "dl_gm",
        "api_key_file": "/home/jovyan/repos/pltv/data/wandb_api.key",
    },
}

objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced


@wandb_mixin
def training_function(config, X_train, X_val):
    early_stopping = EarlyStopping()
    model_checkpoint = ModelCheckpoint(save_best_only=True, wb=wandb)

    deeptabular = TabMlp(
        mlp_hidden_dims=config["hidden_layers"].hidden_layers(),
        column_idx=tab_preprocessor.column_idx,
        embed_input=tab_preprocessor.embeddings_input,
        continuous_cols=tab_preprocessor.continuous_cols,
        mlp_batchnorm=True,
        mlp_batchnorm_last=True,
        mlp_linear_first=True,
    )

    model = WideDeep(wide=wide, deeptabular=deeptabular)

    trainer = Trainer(
        model,
        objective=objective,
        callbacks=[RayTuneReporter, early_stopping, model_checkpoint],
        lr_schedulers={"deeptabular": config["deeptab_sch"]},
        initializers={"deeptabular": XavierNormal},
        optimizers={"deeptabular": config["deeptab_opt"]},
        metrics=metrics,
        verbose=0,
    )

    trainer.fit(
        X_train=X_train,
        X_val=X_val,
        n_epochs=50,
        batch_size=config["batch_size"],
        custom_dataloader=dataloader,
        oversample_mul=5,
    )


# https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-hyperband
asha_scheduler = AsyncHyperBandScheduler(
    time_attr="training_iteration",
    metric="_metric/val_loss",
    mode="min",
    max_t=100,
    grace_period=10,
    reduction_factor=3,
    brackets=1,
)

analysis = tune.run(
    tune.with_parameters(training_function, X_train=X_train, X_val=X_valid),
    # resources_per_trial={"cpu": 4, "gpu": 0},
    num_samples=1,
    progress_reporter=JupyterNotebookReporter(overwrite=True),
    scheduler=asha_scheduler,
    config=config,
    callbacks=[
        WandbLoggerCallback(
            project=config["wandb"]["project"],
            api_key_file=config["wandb"]["api_key_file"],
            log_config=True,
        )
    ],
)

### Train the Best model

In [None]:
%%time
params = copy(analysis.get_best_config("_metric/val_loss", "min"))
params.pop("wandb")

trainer = Trainer(
    model,
    objective=objective,
    callbacks=[LRHistory(n_epochs=10)],
    lr_schedulers={"wide": params["wide_sch"], "deeptabular": params["deeptab_sch"]},
    initializers={"wide": XavierNormal, "deeptabular": XavierNormal},
    optimizers={"wide": params["wide_opt"], "deeptabular": params["deeptab_opt"]},
    metrics=metrics,
    verbose=0,
)

trainer.fit(
    X_train=X_train,
    X_val=X_val,
    n_epochs=5,
    batch_size=params["batch_size"],
    custom_dataloader=dataloader,
    oversample_mul=5,
)