<a target="_blank" href="https://colab.research.google.com/github/yandex-research/tabm/blob/main/example.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# TabM

This notebook provides a usage example of the `tabm` package from the
[TabM](https://github.com/yandex-research/tabm) project.

In [1]:

import math
import random
from copy import deepcopy
from typing import Any, Literal, NamedTuple, Optional

import numpy as np
import rtdl_num_embeddings  # https://github.com/yandex-research/rtdl-num-embeddings
import scipy.special
import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection
import sklearn.preprocessing
import tabm
import torch
import torch.nn as nn
import torch.optim
from torch import Tensor

import numpy as np
import pandas as pd
from tqdm import tqdm
import os

from my_tabm import apply_tabm_cv

In [2]:
seed = 0
random.seed(seed)
np.random.seed(seed + 1)
torch.manual_seed(seed + 2)
pass

# Dataset (Shell)

In [3]:
df_train = pd.read_csv('./dataset/train.csv')
df_test = pd.read_csv('./dataset/test.csv')
df_submission = pd.read_csv('./dataset/sample_solution.csv')

cur_best_path = './submission_cur_best+tabm_b1234_10fold(cb)_93.92695.csv'

df_best = pd.read_csv(cur_best_path)

df_test_pred = pd.concat([df_test, df_best], axis=1)

In [4]:
df_train.columns[:55]


Index(['Component1_fraction', 'Component2_fraction', 'Component3_fraction',
       'Component4_fraction', 'Component5_fraction', 'Component1_Property1',
       'Component2_Property1', 'Component3_Property1', 'Component4_Property1',
       'Component5_Property1', 'Component1_Property2', 'Component2_Property2',
       'Component3_Property2', 'Component4_Property2', 'Component5_Property2',
       'Component1_Property3', 'Component2_Property3', 'Component3_Property3',
       'Component4_Property3', 'Component5_Property3', 'Component1_Property4',
       'Component2_Property4', 'Component3_Property4', 'Component4_Property4',
       'Component5_Property4', 'Component1_Property5', 'Component2_Property5',
       'Component3_Property5', 'Component4_Property5', 'Component5_Property5',
       'Component1_Property6', 'Component2_Property6', 'Component3_Property6',
       'Component4_Property6', 'Component5_Property6', 'Component1_Property7',
       'Component2_Property7', 'Component3_Property7', 'C

In [4]:
df_test_pred = df_test_pred.drop(columns=['ID'])

In [5]:
def get_clipped_and_normal_columns(df_train, freq_threshold):
    clipped_cols = []
    normal_cols = []

    for col in df_train.columns:
        mode = df_train[col].value_counts().index[0]
        mode_freq = df_train[col].value_counts().iloc[0]
        if mode_freq > freq_threshold:
            clipped_cols.append(col)
        else:
            normal_cols.append(col)
    return clipped_cols

In [6]:
property_cols = df_train.columns[5:55]
clipped_cols = get_clipped_and_normal_columns(df_train[property_cols], 20)

# df_train = df_train.drop(columns=clipped_cols)
# df_test_pred = df_test_pred.drop(columns=clipped_cols)
# df_test = df_test.drop(columns=clipped_cols)

In [7]:
target_cols = [f'BlendProperty{i}' for i in range(1, 10 + 1)]
feature_cols = [col for col in df_train.columns if col not in target_cols]

In [8]:
def get_high_correlated_features(df_train, col, threshold):
    df = df_train.corr()[[col]].sort_values(col)
    return [c for c in df[df[col] > threshold].index.tolist() if c in target_cols and c != col]

In [9]:
# def apply_tabm(df_train, df_test_pred, feature_cols, target_col, hparams):
# # for target_col in range(55, 65):
#     # >>> Dataset.
#     TaskType = Literal['regression', 'binclass', 'multiclass']

#     # Regression.
#     task_type: TaskType = 'regression'
#     n_classes = None
#     # dataset = sklearn.datasets.fetch_california_housing()
#     # X_num: np.ndarray = dataset['data']
#     # Y: np.ndarray = dataset['target']



#     X_num = df_train.values[:, :55]
#     Y = df_train.values[:, target_col]

#     X_test = df_test_pred[feature_cols].values
#     Y_test = df_test_pred.values[:, target_col]

#     # print(X_num.shape)


#     task_is_regression = task_type == 'regression'

#     # >>> Numerical (continuous) features.
#     X_num: np.ndarray = X_num.astype(np.float32)
#     n_num_features = X_num.shape[1]

#     X_test: np.ndarray = X_test.astype(np.float32)

#     # >>> Categorical features.
#     # NOTE: the above datasets do not have categorical features, however,
#     # for the demonstration purposes, it is possible to generate them.
#     cat_cardinalities = [
#         # NOTE: uncomment the two lines below to add two categorical features.
#     ]

#     # >>> Labels.
#     if task_type == 'regression':
#         Y = Y.astype(np.float32)
#     else:
#         assert n_classes is not None
#         Y = Y.astype(np.int64)
#         assert set(Y.tolist()) == set(range(n_classes)), (
#             'Classification labels must form the range [0, 1, ..., n_classes - 1]'
#         )

#     # >>> Split the dataset.
#     all_idx = np.arange(len(Y))
#     train_idx, val_idx = sklearn.model_selection.train_test_split(
#         all_idx, train_size=0.85
#     )

#     data_numpy = {
#         'train': {'x_num': X_num[train_idx], 'y': Y[train_idx]},
#         'val': {'x_num': X_num[val_idx], 'y': Y[val_idx]},
#         'test': {'x_num': X_test, 'y': Y_test},
#     }


#     for part, part_data in data_numpy.items():
#         for key, value in part_data.items():
#             # print(f'{part:<5}    {key:<5}    {value.shape!r:<10}    {value.dtype}')
#             del key, value
#         del part, part_data
        


#     # Data Processing



#     # Feature preprocessing.
#     # NOTE
#     # The choice between preprocessing strategies depends on a task and a model.

#     # Simple preprocessing strategy.
#     # preprocessing = sklearn.preprocessing.StandardScaler().fit(
#     #     data_numpy['train']['x_num']
#     # )

#     # Advanced preprocessing strategy.
#     # The noise is added to improve the output of QuantileTransformer in some cases.
#     x_num_train_numpy = data_numpy['train']['x_num']
#     noise = (
#         np.random.default_rng(0)
#         .normal(0.0, 1e-5, x_num_train_numpy.shape)
#         .astype(x_num_train_numpy.dtype)
#     )
#     preprocessing = sklearn.preprocessing.QuantileTransformer(
#         n_quantiles=max(min(len(train_idx) // 30, 1000), 10),
#         output_distribution='normal',
#         subsample=10**9,
#     ).fit(x_num_train_numpy + noise)
#     del x_num_train_numpy

#     # Apply the preprocessing.
#     for part in data_numpy:
#         data_numpy[part]['x_num'] = preprocessing.transform(data_numpy[part]['x_num'])


#     # Label preprocessing.
#     class RegressionLabelStats(NamedTuple):
#         mean: float
#         std: float


#     Y_train = data_numpy['train']['y'].copy()
#     if task_type == 'regression':
#         # For regression tasks, it is highly recommended to standardize the training labels.
#         regression_label_stats = RegressionLabelStats(
#             Y_train.mean().item(), Y_train.std().item()
#         )
#         Y_train = (Y_train - regression_label_stats.mean) / regression_label_stats.std
#     else:
#         regression_label_stats = None
        
        

#     # Pytorch settings



#     # Device
#     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

#     # Convert data to tensors
#     data = {
#         part: {k: torch.as_tensor(v, device=device) for k, v in data_numpy[part].items()}
#         for part in data_numpy
#     }
#     Y_train = torch.as_tensor(Y_train, device=device)
#     if task_type == 'regression':
#         for part in data:
#             data[part]['y'] = data[part]['y'].float()
#         Y_train = Y_train.float()

#     # Automatic mixed precision (AMP)
#     # torch.float16 is implemented for completeness,
#     # but it was not tested in the project,
#     # so torch.bfloat16 is used by default.
#     amp_dtype = (
#         torch.bfloat16
#         if torch.cuda.is_available() and torch.cuda.is_bf16_supported()
#         else torch.float16
#         if torch.cuda.is_available()
#         else None
#     )
#     # Changing False to True can speed up training
#     # of large enough models on compatible hardware.
#     amp_enabled = False and amp_dtype is not None
#     grad_scaler = torch.cuda.amp.GradScaler() if amp_dtype is torch.float16 else None  # type: ignore

#     # torch.compile
#     compile_model = False

#     # fmt: off
#     print(f'Device:        {device.type.upper()}')
#     print(f'AMP:           {amp_enabled}{f" ({amp_dtype})"if amp_enabled else ""}')
#     print(f'torch.compile: {compile_model}')
#     # fmt: on





#     # Model and optimizer

#     # The best performance is usually achieved with `num_embeddings`
#     # from the `rtdl_num_embeddings` package. Typically, `PiecewiseLinearEmbeddings`
#     # and `PeriodicEmbeddings` perform best.



#     # No embeddings.
#     num_embeddings = None

#     # Simple embeddings.
#     num_embeddings = rtdl_num_embeddings.LinearReLUEmbeddings(n_num_features)

#     # Periodic embeddings.
#     num_embeddings = rtdl_num_embeddings.PeriodicEmbeddings(n_num_features, lite=False)


#     n_bins = hparams['n_bins']
#     d_embedding = hparams['d_embedding']
#     # n_bins = trial.suggest_int('n_bins', 2, 128) # prev 48
#     # d_embedding = trial.suggest_int('d_embedding', 8, 32, step=4) # prev 16

#     # Piecewise-linear embeddings.
#     num_embeddings = rtdl_num_embeddings.PiecewiseLinearEmbeddings(
#         rtdl_num_embeddings.compute_bins(data['train']['x_num'], n_bins=n_bins),
#         d_embedding=d_embedding,
#         activation=False,
#         version='B',
#     )
    
#     n_blocks = hparams['n_blocks']
#     d_block = hparams['d_block']
#     arch_type = hparams['arch_type']
#     # n_blocks = trial.suggest_int("n_blocks", 1, 4)
#     # d_block = trial.suggest_int("d_block", 64, 1024, step=16)
#     # arch_type = trial.suggest_categorical('arch_type', ['tabm', 'tabm-mini'])
    
#     model = tabm.TabM.make(
#         n_num_features=n_num_features,
#         cat_cardinalities=cat_cardinalities,
#         d_out=1 if n_classes is None else n_classes,
#         num_embeddings=num_embeddings,
#         n_blocks=n_blocks,
#         d_block=d_block,
#         arch_type=arch_type,
#     ).to(device)
    
#     lr = hparams['lr']
#     weight_decay = hparams['weight_decay']
#     # lr = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
#     # weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True)
    
#     optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
#     gradient_clipping_norm: Optional[float] = 1.0

#     if compile_model:
#         # NOTE
#         # `torch.compile(model, mode="reduce-overhead")` caused issues during training,
#         # so the `mode` argument is not used.
#         model = torch.compile(model)
#         evaluation_mode = torch.no_grad
#     else:
#         evaluation_mode = torch.inference_mode
        



#     # A quick reminder: TabM represents an ensemble of k MLPs.
#     #
#     # The option below determines if the MLPs are trained
#     # on the same batches (share_training_batches=True) or
#     # on different batches. Technically, this option determines:
#     # - How the loss function is implemented.
#     # - How the training batches are constructed.
#     #
#     # `True` is recommended by default because of better training efficiency.
#     # On some tasks, `False` may provide better performance.
#     share_training_batches = True






#     @torch.autocast(device.type, enabled=amp_enabled, dtype=amp_dtype)  # type: ignore[code]
#     def apply_model(part: str, idx: Tensor) -> Tensor:
#         return (
#             model(
#                 data[part]['x_num'][idx],
#                 data[part]['x_cat'][idx] if 'x_cat' in data[part] else None,
#             )
#             .squeeze(-1)  # Remove the last dimension for regression tasks.
#             .float()
#         )


#     base_loss_fn = (
#         nn.functional.mse_loss if task_is_regression else nn.functional.cross_entropy
#     )


#     def loss_fn(y_pred: Tensor, y_true: Tensor) -> Tensor:
#         # TabM produces k predictions. Each of them must be trained separately.

#         # Regression:     (batch_size, k)            -> (batch_size * k,)
#         # Classification: (batch_size, k, n_classes) -> (batch_size * k, n_classes)
#         y_pred = y_pred.flatten(0, 1)

#         if share_training_batches:
#             # (batch_size,) -> (batch_size * k,)
#             y_true = y_true.repeat_interleave(model.backbone.k)
#         else:
#             # (batch_size, k) -> (batch_size * k,)
#             y_true = y_true.flatten(0, 1)

#         return base_loss_fn(y_pred, y_true)


#     @evaluation_mode()
#     def evaluate(part: str) -> float:
#         model.eval()

#         # When using torch.compile, you may need to reduce the evaluation batch size.
#         eval_batch_size = 32
#         y_pred: np.ndarray = (
#             torch.cat(
#                 [
#                     apply_model(part, idx)
#                     for idx in torch.arange(len(data[part]['y']), device=device).split(
#                         eval_batch_size
#                     )
#                 ]
#             )
#             .cpu()
#             .numpy()
#         )
#         if task_type == 'regression':
#             # Transform the predictions back to the original label space.
#             assert regression_label_stats is not None
#             y_pred = y_pred * regression_label_stats.std + regression_label_stats.mean

#         # Compute the mean of the k predictions.
#         if not task_is_regression:
#             # For classification, the mean must be computed in the probability space.
#             y_pred = scipy.special.softmax(y_pred, axis=-1)
#         y_pred = y_pred.mean(1)

#         y_true = data[part]['y'].cpu().numpy()
#         score = (
#             (sklearn.metrics.mean_absolute_percentage_error(y_true, y_pred))
#             if task_type == 'regression'
#             else sklearn.metrics.accuracy_score(y_true, y_pred.argmax(1))
#         )
#         # score = (
#         #     -(sklearn.metrics.mean_squared_error(y_true, y_pred) ** 0.5)
#         #     if task_type == 'regression'
#         #     else sklearn.metrics.accuracy_score(y_true, y_pred.argmax(1))
#         # )
#         return float(100 - 90 * score / 2.72), y_pred  # The higher -- the better.


#     # print(f'Test score before training: {evaluate("test")[0]:.4f}')








#     n_epochs = 1_000_000_000
#     train_size = len(train_idx)
#     batch_size = 32
#     epoch_size = math.ceil(train_size / batch_size)

#     epoch = -1
#     metrics = {'val': -math.inf, 'test': -math.inf}


#     def make_checkpoint() -> dict[str, Any]:
#         return deepcopy(
#             {
#                 'model': model.state_dict(),
#                 'optimizer': optimizer.state_dict(),
#                 'epoch': epoch,
#                 'metrics': metrics,
#             }
#         )


#     best_checkpoint = make_checkpoint()

#     # Early stopping: the training stops if the validation score
#     # does not improve for more than `patience` consecutive epochs.
#     patience = 100
#     remaining_patience = patience

#     for epoch in range(n_epochs):
#         batches = (
#             # Create one standard batch sequence.
#             torch.randperm(train_size, device=device).split(batch_size)
#             if share_training_batches
#             # Create k independent batch sequences.
#             else (
#                 torch.rand((train_size, model.backbone.k), device=device)
#                 .argsort(dim=0)
#                 .split(batch_size, dim=0)
#             )
#         )
#         for batch_idx in batches:
#             model.train()
#             optimizer.zero_grad()
#             loss = loss_fn(apply_model('train', batch_idx), Y_train[batch_idx])
#             if gradient_clipping_norm is not None:
#                 if grad_scaler is not None:
#                     grad_scaler.unscale_(optimizer)
#                 torch.nn.utils.clip_grad.clip_grad_norm_(
#                     model.parameters(), gradient_clipping_norm
#                 )
#             if grad_scaler is None:
#                 loss.backward()
#                 optimizer.step()
#             else:
#                 grad_scaler.scale(loss).backward()  # type: ignore
#                 grad_scaler.step(optimizer)
#                 grad_scaler.update()

#         metrics = {part: evaluate(part)[0] for part in ['val', 'test']}
#         val_score_improved = metrics['val'] > best_checkpoint['metrics']['val']

#         # print(
#         #     f'{"*" if val_score_improved else " "}'
#         #     f' [epoch] {epoch:<3}'
#         #     f' [val] {metrics["val"]:.3f}'
#         #     f' [test] {metrics["test"]:.3f}'
#         # )

#         if val_score_improved:
#             best_checkpoint = make_checkpoint()
#             remaining_patience = patience
#         else:
#             remaining_patience -= 1

#         if remaining_patience < 0:
#             break

#     # To make final predictions, load the best checkpoint.
#     model.load_state_dict(best_checkpoint['model'])

#     val_score = best_checkpoint["metrics"]["val"]
    
#     print('\n[Summary]')
#     print(f'best epoch:  {best_checkpoint["epoch"]}')
#     print(f'val score:  {best_checkpoint["metrics"]["val"]}')
#     # print(f'test score: {best_checkpoint["metrics"]["test"]}')

#     df_submit = pd.read_csv('./from_BRACU_HPC/submission_autogluon_time_fraction_experimental_quality.csv')
#     df_submit[f'BlendProperty{target_col - 55 + 1}'] = evaluate('test')[1]
#     df_submit.to_csv(f'submission_tabm_test{target_col - 55 + 1}.csv', index=False)
    
   
    
#     return best_checkpoint["metrics"]["val"]

In [10]:
df_best_hparams = None

In [None]:
# for k in [16, 25, 32, 48]:
# from my_stack_builder import build_stack

k = 32
hparams_all = pd.read_csv(f'./optuna/tabm_cv/hparams_cv.csv', index_col=0)

# target_cols = [f'BlendProperty{i}' for i in [1, 2, 3, 4, 5, 6, 7, 8]]

target_cols = [i for i in range(55, 65)]

final_test_preds = None
for target_col in tqdm(target_cols):
        
    col_name = f'BlendProperty{target_col - 54}'
    print(col_name)
    
    hparams = hparams_all[hparams_all['Target'] == col_name].iloc[0].to_dict()
    
    cv_score_7 = float(hparams_all[hparams_all['Target'] == "BlendProperty7"].iloc[0].to_dict()['Score'])
    cv_score_9 = float(hparams_all[hparams_all['Target'] == "BlendProperty9"].iloc[0].to_dict()['Score'])
    
    if target_col == 57 or target_col == 61:
        if cv_score_7 > cv_score_9:
            hparams = hparams_all[hparams_all['Target'] == "BlendProperty7"].iloc[0].to_dict()
        else:
            hparams = hparams_all[hparams_all['Target'] == "BlendProperty9"].iloc[0].to_dict()
    
    hparams['k'] = k
    
    # correlated_target_features = get_high_correlated_features(df_train, target_col, 0.6)
    # print(correlated_target_features)
    # feature_cols = feature_cols + get_high_correlated_features(df_train, target_col, 0.5)
    
    n_splits = 10
    seeds = [500, 600, 700, 800]
    for seed in seeds:
        score, test_preds = apply_tabm_cv(hparams, df_train, df_test_pred, feature_cols, col_name, n_splits=n_splits, seed=seed)
        print(score)
        
        # df_submission[target_col] = test_preds
        os.makedirs(f'./pred_{n_splits}_fold', exist_ok=True)
        np.save(f'pred_{n_splits}_fold/b{target_col-54}_fold_{n_splits}_seed_{seed}.npy', test_preds)


    

        # study = optuna.create_study(sampler=TPESampler(), direction='maximize')
        # study.optimize(objective, n_trials=2)
        
        # studies.append(study)
        # map_hparams = study.best_params
        # map_hparams['Target'] = f'BlendProperty{target_col - 55 + 1}'
        # df_cur_best = pd.DataFrame([map_hparams])
        # df_best_hparams = pd.concat([df_best_hparams, df_cur_best])
        # os.makedirs('./optuna/tabm', exist_ok=True)
        # df_best_hparams.to_csv('./optuna/tabm/hparams.csv')

  0%|          | 0/10 [00:00<?, ?it/s]

BlendProperty1
Device:        CUDA

[Summary]
best epoch:  102
val score:  96.63620642112458
Device:        CUDA

[Summary]
best epoch:  46
val score:  96.00711919586448
Device:        CUDA

[Summary]
best epoch:  274
val score:  98.17465803147677
Device:        CUDA

[Summary]
best epoch:  55
val score:  97.2833085750394
Device:        CUDA

[Summary]
best epoch:  55
val score:  96.99497480383691
Device:        CUDA

[Summary]
best epoch:  136
val score:  97.27983131785604
Device:        CUDA

[Summary]
best epoch:  68
val score:  97.95358213048209
Device:        CUDA

[Summary]
best epoch:  199
val score:  98.04308520191732
Device:        CUDA

[Summary]
best epoch:  117
val score:  98.12190935892217
Device:        CUDA

[Summary]
best epoch:  109
val score:  97.78883841217441
97.42835133566695
Device:        CUDA

[Summary]
best epoch:  127
val score:  94.88379181746174
Device:        CUDA

[Summary]
best epoch:  137
val score:  96.60650711944875
Device:        CUDA

[Summary]
best 

 10%|█         | 1/10 [08:43<1:18:31, 523.54s/it]


[Summary]
best epoch:  159
val score:  97.59701239492964
97.10940706914394
BlendProperty2
Device:        CUDA

[Summary]
best epoch:  77
val score:  97.54504706053173
Device:        CUDA

[Summary]
best epoch:  131
val score:  97.15794365414801
Device:        CUDA

[Summary]
best epoch:  183
val score:  97.8059010087129
Device:        CUDA

[Summary]
best epoch:  111
val score:  97.82572889371829
Device:        CUDA

[Summary]
best epoch:  138
val score:  97.42812419529346
Device:        CUDA

[Summary]
best epoch:  65
val score:  97.91633258299792
Device:        CUDA

[Summary]
best epoch:  218
val score:  97.87827874259914
Device:        CUDA

[Summary]
best epoch:  174
val score:  97.56818900213523
Device:        CUDA

[Summary]
best epoch:  279
val score:  98.05623123093562
Device:        CUDA

[Summary]
best epoch:  90
val score:  96.09506730099811
97.52768435294547
Device:        CUDA

[Summary]
best epoch:  155
val score:  94.84411336700705
Device:        CUDA

[Summary]
best e

 20%|██        | 2/10 [18:19<1:13:55, 554.49s/it]


[Summary]
best epoch:  223
val score:  98.21777343202163
97.43353485172287
BlendProperty3
Device:        CUDA

[Summary]
best epoch:  93
val score:  90.36196115262368
Device:        CUDA

[Summary]
best epoch:  70
val score:  81.67513814919135
Device:        CUDA

[Summary]
best epoch:  83
val score:  87.27851832175956
Device:        CUDA

[Summary]
best epoch:  84
val score:  90.06312165190192
Device:        CUDA

[Summary]
best epoch:  131
val score:  89.35785155524226
Device:        CUDA

[Summary]
best epoch:  52
val score:  92.69700357142617
Device:        CUDA

[Summary]
best epoch:  84
val score:  74.9507040661924
Device:        CUDA

[Summary]
best epoch:  156
val score:  92.91789975455579
Device:        CUDA

[Summary]
best epoch:  76
val score:  83.90157871386585
Device:        CUDA

[Summary]
best epoch:  65
val score:  86.5490521359093
86.97528264013336
Device:        CUDA

[Summary]
best epoch:  179
val score:  94.07734820509658
Device:        CUDA

[Summary]
best epoch: 

 30%|███       | 3/10 [25:03<56:39, 485.63s/it]  


[Summary]
best epoch:  61
val score:  87.37997189164162
88.34552913886814
BlendProperty4
Device:        CUDA

[Summary]
best epoch:  92
val score:  98.11558126855422
Device:        CUDA

[Summary]
best epoch:  145
val score:  96.94981409565492
Device:        CUDA

[Summary]
best epoch:  146
val score:  97.90263211573748
Device:        CUDA

[Summary]
best epoch:  205
val score:  98.19407298448769
Device:        CUDA

[Summary]
best epoch:  53
val score:  96.56881197520039
Device:        CUDA

[Summary]
best epoch:  170
val score:  97.30628016678726
Device:        CUDA

[Summary]
best epoch:  114
val score:  97.29911092778339
Device:        CUDA

[Summary]
best epoch:  127
val score:  98.33368393592536
Device:        CUDA

[Summary]
best epoch:  67
val score:  96.546747354681
Device:        CUDA

[Summary]
best epoch:  77
val score:  97.28360958397388
97.45003437183728
Device:        CUDA

[Summary]
best epoch:  53
val score:  96.02696680299499
Device:        CUDA

[Summary]
best epoch

 40%|████      | 4/10 [34:39<52:07, 521.18s/it]


[Summary]
best epoch:  256
val score:  98.12366191626472
97.36904427597747
BlendProperty5
Device:        CUDA

[Summary]
best epoch:  186
val score:  96.17709235890823
Device:        CUDA

[Summary]
best epoch:  198
val score:  98.83398999843527
Device:        CUDA

[Summary]
best epoch:  54
val score:  98.32731825025643
Device:        CUDA

[Summary]
best epoch:  143
val score:  99.34657345361569
Device:        CUDA

[Summary]
best epoch:  178
val score:  99.379327712471
Device:        CUDA

[Summary]
best epoch:  50
val score:  97.37217449857032
Device:        CUDA

[Summary]
best epoch:  71
val score:  99.09831707447987
Device:        CUDA

[Summary]
best epoch:  214
val score:  98.72678582666113
Device:        CUDA

[Summary]
best epoch:  125
val score:  99.14205822107546
Device:        CUDA

[Summary]
best epoch:  109
val score:  98.59614351161702
98.49997804048309
Device:        CUDA

[Summary]
best epoch:  213
val score:  99.23532408908667
Device:        CUDA

[Summary]
best ep

 50%|█████     | 5/10 [45:35<47:29, 569.97s/it]


[Summary]
best epoch:  83
val score:  98.5700063955258
98.63966859719962
BlendProperty6
Device:        CUDA

[Summary]
best epoch:  102
val score:  96.14856331883108
Device:        CUDA

[Summary]
best epoch:  360
val score:  98.47636833403479
Device:        CUDA

[Summary]
best epoch:  262
val score:  97.82086714335225
Device:        CUDA

[Summary]
best epoch:  107
val score:  95.81217732937897
Device:        CUDA

[Summary]
best epoch:  115
val score:  97.42122983340832
Device:        CUDA

[Summary]
best epoch:  142
val score:  97.48244558406226
Device:        CUDA

[Summary]
best epoch:  84
val score:  98.07219949605711
Device:        CUDA

[Summary]
best epoch:  253
val score:  98.10176345464937
Device:        CUDA

[Summary]
best epoch:  107
val score:  97.1423530677224
Device:        CUDA

[Summary]
best epoch:  119
val score:  96.72893097843317
97.32068988885919
Device:        CUDA

[Summary]
best epoch:  239
val score:  97.63770949314622
Device:        CUDA

[Summary]
best e

 60%|██████    | 6/10 [54:34<37:18, 559.54s/it]


[Summary]
best epoch:  114
val score:  97.21551179776297
97.54255823743927
BlendProperty7
Device:        CUDA

[Summary]
best epoch:  158
val score:  91.3247814949821
Device:        CUDA

[Summary]
best epoch:  145
val score:  85.99026319296921
Device:        CUDA

[Summary]
best epoch:  129
val score:  91.82815274552388
Device:        CUDA

[Summary]
best epoch:  148
val score:  92.58501937284188
Device:        CUDA

[Summary]
best epoch:  147
val score:  91.91354609587613
Device:        CUDA

[Summary]
best epoch:  66
val score:  71.39664103879647
Device:        CUDA

[Summary]
best epoch:  140
val score:  89.82677071848336
Device:        CUDA

[Summary]
best epoch:  122
val score:  93.63224252181895
Device:        CUDA

[Summary]
best epoch:  180
val score:  90.1466862682034
Device:        CUDA

[Summary]
best epoch:  156
val score:  93.58575352851082
89.22298534213468
Device:        CUDA

[Summary]
best epoch:  116
val score:  92.96311174026307
Device:        CUDA

[Summary]
best 

 70%|███████   | 7/10 [1:01:42<25:49, 516.36s/it]


[Summary]
best epoch:  108
val score:  86.10113604980357
87.1380665370045
BlendProperty8
Device:        CUDA

[Summary]
best epoch:  89
val score:  95.11952150393935
Device:        CUDA

[Summary]
best epoch:  50
val score:  89.46942553362426
Device:        CUDA

[Summary]
best epoch:  148
val score:  94.24501585171504
Device:        CUDA

[Summary]
best epoch:  113
val score:  95.28669954880195
Device:        CUDA

[Summary]
best epoch:  193
val score:  88.61947688548004
Device:        CUDA

[Summary]
best epoch:  70
val score:  93.99417556603166
Device:        CUDA

[Summary]
best epoch:  132
val score:  92.53642011214706
Device:        CUDA

[Summary]
best epoch:  21
val score:  88.055519759655
Device:        CUDA

[Summary]
best epoch:  104
val score:  78.66661671329948
Device:        CUDA

[Summary]
best epoch:  81
val score:  94.81456518611488
91.08074329901741
Device:        CUDA

[Summary]
best epoch:  126
val score:  94.22219094327268
Device:        CUDA

[Summary]
best epoch

 80%|████████  | 8/10 [1:08:21<15:58, 479.18s/it]


[Summary]
best epoch:  112
val score:  90.91885184102199
90.74462981769238
BlendProperty9
Device:        CUDA

[Summary]
best epoch:  104
val score:  86.87943717574372
Device:        CUDA

[Summary]
best epoch:  117
val score:  85.35201578017544
Device:        CUDA

[Summary]
best epoch:  106
val score:  92.37010984078927
Device:        CUDA

[Summary]
best epoch:  172
val score:  79.1138928164454
Device:        CUDA

[Summary]
best epoch:  75
val score:  92.29987590190242
Device:        CUDA

[Summary]
best epoch:  70
val score:  89.09866437315941
Device:        CUDA

[Summary]
best epoch:  122
val score:  90.80510448445293
Device:        CUDA

[Summary]
best epoch:  53
val score:  81.68390463380253
Device:        CUDA

[Summary]
best epoch:  203
val score:  89.8579977452755
Device:        CUDA

[Summary]
best epoch:  51
val score:  88.5811429909047
87.60421484880118
Device:        CUDA

[Summary]
best epoch:  106
val score:  86.81069570867454
Device:        CUDA

[Summary]
best epoc

 90%|█████████ | 9/10 [1:15:12<07:37, 457.72s/it]


[Summary]
best epoch:  43
val score:  90.88255326975795
86.19291553143182
BlendProperty10
Device:        CUDA

[Summary]
best epoch:  242
val score:  96.94433627540575
Device:        CUDA

[Summary]
best epoch:  192
val score:  96.62410270641831
Device:        CUDA

[Summary]
best epoch:  245
val score:  96.71658837948652
Device:        CUDA

[Summary]
best epoch:  72
val score:  86.69064121649546
Device:        CUDA

[Summary]
best epoch:  192
val score:  98.16293803536716
Device:        CUDA

[Summary]
best epoch:  266
val score:  97.43597853161833
Device:        CUDA

[Summary]
best epoch:  123
val score:  80.00159158426173
Device:        CUDA

[Summary]
best epoch:  97
val score:  97.41858583601082
Device:        CUDA

[Summary]
best epoch:  160
val score:  96.81974273613271
Device:        CUDA

[Summary]
best epoch:  191
val score:  97.94091485605082
94.47554196008807
Device:        CUDA

[Summary]
best epoch:  157
val score:  96.43705462050788
Device:        CUDA

[Summary]
best

100%|██████████| 10/10 [1:25:26<00:00, 512.63s/it]


[Summary]
best epoch:  498
val score:  93.49697980591479
96.58901746110881





In [None]:
f'b{target_col-54}_fold_{n_splits}_seed_{seed}.npy'

In [None]:
# df_submission.to_csv('submision_tabm_cv-1.csv', index=False)

In [None]:
# default hparams

# hparams = {
#     'n_blocks': 2,
#     'd_block': 512,
#     'embedding_type': 'PiecewiseLinearEmbeddings',
#     'n_bins': 48,
#     'd_embedding': 16,
#     'arch_type': 'tabm',
#     'lr': 2e-3,
#     'weight_decay': 3e-4,
#     'share_training_batches': 'T',
# }

In [None]:
df_hparams = pd.read_csv('./optuna/tabm_cv/hparams_cv.csv')
df_hparams['Score']

In [None]:
df_best = pd.read_csv(cur_best_path)
# df_ag_b9 = pd.read_csv('submission_autogluon_b9.csv')
i = 10
# pred_1 = np.load(f'./pred_10_fold/b{i}_fold_10_seed_10.npy')
pred_2 = np.load(f'./pred_10_fold/b{i}_fold_10_seed_20.npy')
pred_3 = np.load(f'./pred_10_fold/b{i}_fold_10_seed_30.npy')

# df_best[f'BlendProperty{i}'] = (pred_2 + pred_3) / 2

df_best['BlendProperty3'] = df_best['BlendProperty3'].clip(upper=df_train['BlendProperty3'].max())
df_best.to_csv(f'submission_cur_best+3clipped_10fold.csv', index=False)

# improved: 1 2 3 4
# Not improved: 5 6 7 8

In [None]:
df_best = pd.read_csv('submission_cur_best_tabm_b10_(cb)_93.76430.csv')
# df_ag_b9 = pd.read_csv('submission_autogluon_b9.csv')
i = 5

for i in range(1, 4 + 1):
    df_pred_i = pd.read_csv(f'./submission_cur_best+tabm_b{i}_10fold.csv')
    df_best[f'BlendProperty{i}'] = df_pred_i[f'BlendProperty{i}']
    
df_best.to_csv(f'submission_cur_best+tabm_b1234_10fold.csv', index=False)

# improved: 1 2 3 4
# Not improved: 5 6 7 8

In [None]:
b9 = np.load('/home/nuwaisir/Corridor/Contests/Shell_ai_fuel_blend/tabular-dl-tabr/notebooks/b9.npy')
b9

In [None]:
b9 = np.load('tabular-dl-tabr/notebooks/b9.npy')

In [None]:
df_best = pd.read_csv('./Submissions_v3/sub_tabm_hpo_better_only (prev_best).csv') # 92.92676
# df_ag = pd.read_csv('/home/nuwaisir/Corridor/Contests/Shell_ai_fuel_blend/Submissions/submission_autogluon_avg_86.45445+86.29786.csv')
# df_qg_eq_b9 = pd.read_csv('./from_BRACU_HPC/submission_autogluon_time_fraction_experimental_quality_b9.csv')

# df_ag_exp_hpc = pd.read_csv('/home/nuwaisir/Corridor/Contests/Shell_ai_fuel_blend/from_BRACU_HPC/submission_autogluon_time_fraction_experimental_quality.csv')

# for target_col in range(1, 10 + 1):
#     df = pd.read_csv(f'./submission_tabm_cv-1.csv')
#     if target_col in [1, 4, 5, 6, 7, 8, 10]:
#         col_name = f'BlendProperty{target_col}'
#         df_best[col_name] = df[col_name]

# df_best['BlendProperty3'] = df_best['BlendProperty7'] + 0.02
# df_best['BlendProperty3'].clip(upper=1.638646401455354)

# df_best['BlendProperty2'] = df_ag['BlendProperty2']

# col_num = 10

df_cur = pd.read_csv(f'./submission_tabm_cv-1.csv')
for col_num in [1, 2, 3, 4, 6, 8]:
    df_best[f'BlendProperty{col_num}'] = df_cur[f'BlendProperty{col_num}']

# df_best['BlendProperty7'] = df_best['BlendProperty3']

df_best.to_csv(f'./sub_tabm_cv_b123468.csv', index=False)

In [None]:
# baseline: 92.92676
# 1 -> 93.09587
# 2 -> 93.05496
# 3 -> 93.06625
# 4 -> 93.04043
# 5 -> 92.50959 (-)
# 6 -> 93.15714
# 7 -> 92.72442 (-)
# 8 -> 92.95119
# 9 -> 90.83002 (-)
# 10 -> 92.81574 (-)


In [None]:
# # df_best = pd.read_csv('./Submissions_v2/submission_tabm_all_d_embd=16_valid_shuffle_bs_32_per_emd.csv')
# df_ag = pd.read_csv('/home/nuwaisir/Corridor/Contests/Shell_ai_fuel_blend/Submissions/submission_autogluon_avg_86.45445+86.29786.csv')

# for col_num in [2, 3]:
#     col_name = f'BlendProperty{col_num}'
#     df_best[col_name] = df_ag[col_name]

# df_best.to_csv(f'./sub_tabm_hpo_better_ag2,3.csv', index=False)

In [None]:
# baseline = 91.87684
# better = [1, 4, 5, 6, 7, 8, 10]

# 1 -> ~ 91.8
# 4 -> ~ 91... (better)
# 5 -> 92.45978
# 6 -> 91.97766
# 7 -> 92.01413
# 8 -> 91.95678
# # 9 -> 87.50528
# 10 -> 92.10632


In [None]:
# for i in range(1, 10 + 1):
#     # if True:
#     df = pd.read_csv(f'./submission_tabm_test{i}.csv')
#     df_submission[f'BlendProperty{i}'] = df[f'BlendProperty{i}']
#     # else:
#     #     df_submission[f'BlendProperty{i}'] = df_ag[f'BlendProperty{i}']

# df_submission.to_csv(f'./submission_tabm_test_all_hpo.csv', index=False)