In [None]:
import pandas as pd

path1 = "/home/ildar/.cache/kagglehub/datasets/amruthayenikonda/dirty-dataset-to-practice-data-cleaning/versions/1/train.csv"

path2 = "/home/ildar/.cache/kagglehub/datasets/sushant097/bank-marketing-dataset-full/versions/1/bank-full.csv"

train_data = pd.read_csv(path1, index_col = "id")
extra_data = pd.read_csv(path2, delimiter=";")

In [None]:
train_data.info()

In [None]:
train_data.head()

In [None]:
extra_data.head()

In [None]:
TARGET = "y"
NUMS = ["age", "balance", "day", "duration", "campaign", "pdays", "previous"]
CATS = ["job", "marital", "education", "default", "housing", "loan", "contact", "month", "poutcome"]

In [None]:
extra_data["y"] = extra_data["y"].apply(lambda x:1 if x=="yes" else 0)

In [None]:
df_train = pd.concat([train_data, extra_data], axis=0, ignore_index=True)

In [None]:
import matplotlib.pyplot as plt

fig, axs = plt.subplots(3,3, figsize=(12,8))

axs = axs.flatten()

for i, col in enumerate(CATS):
    train_data[col].value_counts().plot(kind="bar", ax = axs[i])
    axs[i].tick_params(axis="x", rotation=45)

plt.tight_layout()

In [None]:
df_train.hist(bins=50, figsize=(12,8))

In [None]:
import seaborn as sns

sns.heatmap(df_train[NUMS].corr())

In [None]:
df_train["duration"].hist(bins=50, figsize=(12,8))

In [None]:
df_train.plot(kind="scatter", x="pdays", y="day", figsize=(12,8))

In [None]:
#cyclical features

import numpy as np

df_train['_pdays_sin'] = np.sin(2*np.pi * df_train['pdays'] / 365).astype('float32')
df_train['_pdays_cos'] = np.cos(2*np.pi*df_train['pdays']/365).astype('float32')

In [None]:
#convert balance and duration to float32

df_train["balance"] = df_train["balance"].astype("float32")
df_train["duration"] = df_train["duration"].astype("float32")

In [None]:
#convert day and month attribute to date which is one number in a year


replacement_values = {'may': 5,
 'aug': 8,
 'jul': 7,
 'jun': 6,
 'nov': 11,
 'apr': 4,
 'feb': 2,
 'jan': 1,
 'oct': 10,
 'sep': 9,
 'mar': 3,
 'dec': 12}

df_train["data"] = (df_train["month"].replace(replacement_values)-1)*12+df_train["day"]

In [None]:
df_train["_date_cos"] = np.cos(2*np.pi*df_train["data"]/365).astype("float32")
df_train["_date_sin"] = np.sin(2*np.pi*df_train["data"]/365).astype("float32")

In [None]:
df_train["was_not_contacted"] = (df_train["pdays"]==-1).astype("category")

In [None]:
#convert duration and balance by logarithms

df_train["balance_log"] = np.sign(df_train["balance"])*np.log1p(np.abs(df_train["balance"]))
df_train["duration_log"] = np.log1p(df_train["duration"])

In [None]:
df_target = df_train["y"]
df_train.drop("y", axis=1, inplace=True)

In [None]:
#convert int64 and object dtypes to category
cols = df_train.select_dtypes(["int64", "object"]).columns.to_list()

for col in cols:
    df_train[col] = df_train[col].astype("category")

df_train.info()

In [None]:
#finally let's do interactions
from itertools import combinations

CATS = df_train.select_dtypes("category").columns.to_list()
NUMS = df_train.select_dtypes("float32").columns.to_list()

for col in combinations(CATS,2):
    name = "_".join(col)
    df_train[name] = (df_train[col[0]].astype("str")+"_"+df_train[col[1]].astype("str")).astype("category")

In [None]:
for col in combinations(NUMS, 2):
    name = "x".join(col)
    df_train[name] = df_train[col[0]]*df_train[col[1]]

In [None]:
df_train.info()

In [None]:
CATS = df_train.select_dtypes("category").columns.to_list()
NUMS = df_train.select_dtypes("number").columns.to_list()

In [None]:
df_train.head()

## 1) Hyperparameter tuning for TabM_D_Classifier from PyTabkit

In [None]:
#approach using optuna

import optuna
from sklearn.model_selection import StratifiedKFold
import gc
from pytabkit import TabM_D_Classifier
from category_encoders import TargetEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np
from sklearn.metrics import roc_auc_score
import torch



def objective(trial):

    #define hyperparameter search space. Keep it simple and just search "lr" and "weight_decay"
    
    lr = trial.suggest_float("lr", 1e-5, 1e0, log=True)
    smoothing = trial.suggest_float("smoothing", 1e-5, 1e0, log=True)


    param_TabM_inputs = {
        'device': 'cuda',
        'val_metric_name': '1-auc_ovr',
        'random_state': 100,
        'verbosity': 2,
        'arch_type': 'tabm-mini',
        'tabm_k': 32,
        'num_emb_type': 'pwl',
        'd_embedding': 12,
        'batch_size': 256,
        'lr': lr,
        'n_epochs': 10,
        'dropout': 0.1,
        'd_block': 512,
        'n_blocks': 3
    }
    

    #define TargetEncoder and StandardScaler    


    #model = make_pipeline(col_tran, tab_m)

    kf = StratifiedKFold(n_splits=3, shuffle=True)
    fold_ROC = []

    for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):
        df_train_cross, df_val_cross = df_train.loc[train_idx, CATS+NUMS], df_train.loc[val_idx, CATS+NUMS]
        y_train, y_val = df_target.loc[train_idx], df_target.loc[val_idx]

        #instantiate and fit column transformer
        col_tran = ColumnTransformer([("cat", TargetEncoder(cols=CATS, smoothing=smoothing), CATS), 
                                  ("num", StandardScaler(), NUMS)])#, remainder="passthrough")
        df_train_cross = col_tran.fit_transform(df_train_cross, y_train)
        df_val_cross = col_tran.transform(df_val_cross)

        #instantiate and fit tabM model
        tab_m = TabM_D_Classifier(**param_TabM_inputs)

        tab_m.fit(df_train_cross, y_train)
        oof = tab_m.predict_proba(df_val_cross)[:,1]
        error = roc_auc_score(y_val, oof)
        fold_ROC.append(error)

        del tab_m
        del df_train_cross, df_val_cross, y_train, y_val
        torch.cuda.empty_cache()
        gc.collect()
        
    return sum(fold_ROC)/len(fold_ROC)

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=1, n_jobs=1, show_progress_bar=True)
torch.cuda.empty_cache()  #clear reserved memory

In [None]:
study.best_value

In [None]:
study.best_params


## 2) Hyperparameter tuning with xgboost model

In [None]:
from xgboost import XGBClassifier
from category_encoders import TargetEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import roc_auc_score
from sklearn.compose import ColumnTransformer




def objective(trial):

    #define hyperparameter search space. Keep it simple and just search "lr" and "weight_decay"
    
    lr = trial.suggest_float("lr", 1e-5, 1e0, log=True)
    smoothing = trial.suggest_float("smoothing", 1e-5, 1e0, log=True)
    max_depth = trial.suggest_int("max_depth", 2, 8)
    n_estimators=trial.suggest_int("n_est", 500, 1000)
    
    #model = make_pipeline(col_tran, tab_m)

    kf = StratifiedKFold(n_splits=3, shuffle=True)
    fold_ROC = []

    for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):
        df_train_cross, df_val_cross = df_train.loc[train_idx, CATS+NUMS], df_train.loc[val_idx, CATS+NUMS]
        y_train, y_val = df_target.loc[train_idx], df_target.loc[val_idx]

        preproc = ColumnTransformer([("cats", TargetEncoder(cols=CATS, smoothing = smoothing), CATS),
                            ("nums", StandardScaler(), NUMS)])
        est = XGBClassifier(device = "cuda", learning_rate=lr, max_depth=max_depth, n_estimators=n_estimators)
        model = make_pipeline(preproc, est)

        model.fit(df_train_cross, y_train)
        oof = model.predict_proba(df_val_cross)[:,1]
        error = roc_auc_score(y_val, oof)
        fold_ROC.append(error)

        del model
        del df_train_cross, df_val_cross, y_train, y_val
        torch.cuda.empty_cache()
        gc.collect()
        
    return sum(fold_ROC)/len(fold_ROC)

In [None]:
study_forest = optuna.create_study(direction="maximize")
study_forest.optimize(objective, n_trials=10, n_jobs=1, show_progress_bar=True)
torch.cuda.empty_cache()  #clear reserved memory

In [None]:
study_forest.best_params

## 3) Hyperparameter tuning with custom Transformer

In [None]:
import torch
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import ColumnTransformer
import numpy as np


class MyDataset(torch.utils.data.Dataset):
    def __init__(self, train_data, cat_cols, num_cols, train_target):

        m = OrdinalEncoder(dtype=np.int64)

        train_data_copy = train_data.copy()
        train_data_copy[cat_cols] = m.fit_transform(train_data_copy[cat_cols])

        self.train_cat = train_data_copy[cat_cols].values
        self.train_num = train_data_copy[num_cols].values

        self.target = train_target.values
        

    def __getitem__(self, idx):
        return self.train_cat[idx], self.train_num[idx], self.target[idx]
        
    def __len__(self):
        return len(self.train_cat)


def collate_fn(batch):
    cat_features = [item[0] for item in batch]
    num_features = [item[1] for item in batch]
    targets = [item[2] for item in batch]

    cat_features = torch.tensor(cat_features, dtype=torch.int64)
    num_features = torch.tensor(num_features, dtype=torch.float32)
    targets = torch.tensor(targets, dtype=torch.int64)
    
    return [cat_features, num_features, targets]

In [None]:
dataset  = MyDataset(df_train, CATS, NUMS, df_target)

dataloader = torch.utils.data.DataLoader(dataset, collate_fn=collate_fn)

next(iter(dataloader))

In [None]:
import torch.nn as nn
import math

#custom model
class TabTransformer(nn.Module):
    def __init__(self, cat_cols, num_cols, num_hiddens, ffn_num_hiddens, num_heads, num_blks, dropout):
        super().__init__()
        self.num_hiddens=num_hiddens
        self.cat_cols = cat_cols
        
        #create a (learnable) embedding vector for each categorical column
        for col in cat_cols:
            name = "token_embedding_" + col
            vocab_size = len(df_train[col].value_counts())
            setattr(self, name, nn.Embedding(vocab_size, num_hiddens))
        
        self.blks = nn.Sequential()
        for i in range(num_blks):
            self.blks.add_module(f"{i}", nn.TransformerEncoderLayer(d_model=num_hiddens, 
                                                                   nhead=num_heads,
                                                                   dim_feedforward=ffn_num_hiddens,
                                                                   dropout=dropout, 
                                                                   batch_first=True))

        self.cont_layer_norm = nn.LayerNorm(len(num_cols))
        
        self.mlp = nn.Sequential(nn.LazyLinear(num_hiddens), nn.ReLU(), nn.LayerNorm(num_hiddens), nn.LazyLinear(2))

    def forward(self, cat_input_ids, num_input):
        tensor_list = []
        for i, col in enumerate(self.cat_cols):
            name = "token_embedding_" + col
            embed = getattr(self, name)
            tensor_list.append(embed(cat_input_ids[:,i]))  #this will be ith column

        X = torch.stack(tensor_list, dim=0)
        X = X.permute(1,0,2)

        for blk in self.blks:
            X  = blk(X)

        X = X.permute(1,0,2)

        X = torch.cat(list(X), dim=1)  #output should be batch_size*(num_cat_features*hidden_dim)

        Y = self.cont_layer_norm(num_input)    #output will be batch_size*(num_num_features)

        X = torch.cat([X, Y], dim=1)  #output will be 2-dim matrix of batch_size*TOTAL_hidden_dim

        X = self.mlp(X)

        
            
        return X

    
    def apply_init(self, inputs, init=None):
        self.forward(*inputs)
        if init is not None:
            self.mlp.apply(init)
            self.blks.apply(init)

    def init_weights_xavier_uniform(self, m):
        if hasattr(m, 'weight') and m.weight.dim()>=2:
            nn.init.xavier_uniform_(m.weight)


In [None]:
import torch
from torcheval.metrics.functional import binary_auroc

X = torch.tensor([[-3,2],[2.1,1],[-10,2], [2,3],[2.122,0.283892],[2.1234123,0.13412]])
softmax_layer = nn.Softmax(dim=1)
X = softmax_layer(X)
X = X[:,1]

binary_auroc(X, torch.tensor([1,0,1,1,1,0])).item()

In [None]:
import optuna
from sklearn.model_selection import StratifiedKFold
import gc
from torcheval.metrics.functional import binary_auroc



def objective(trial):

    #define hyperparameter search space. Keep it simple and just search "lr" and "weight_decay"
    
    lr = trial.suggest_float("lr", 1e-5, 1e0, log=True)
    
    weight_decay = trial.suggest_float("Ridge", 1e-5, 1e0, log=True)

    #define model, optimizer and criterion    
    model = TabTransformer(cat_cols=CATS, num_cols=NUMS, num_hiddens=32, ffn_num_hiddens=128, num_heads=8, num_blks=6, dropout=0.2)
    device = torch.device("cuda:0")
    model.to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    criterion = nn.CrossEntropyLoss()


    #define data and cross-validation splitting values

    kf = StratifiedKFold(n_splits=3, shuffle=True)
    fold_accuracies=[]

    for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):

        df_train_cross, df_val_cross = df_train.loc[train_idx, CATS+NUMS], df_train.loc[val_idx, CATS+NUMS]
        y_train, y_val = df_target.loc[train_idx], df_target.loc[val_idx]

        train_subset = MyDataset(df_train_cross, CATS, NUMS, y_train)
        val_subset = MyDataset(df_val_cross, CATS, NUMS, y_val)
        

        train_dataloader = torch.utils.data.DataLoader(train_subset, batch_size=1024, shuffle=True, pin_memory=True, num_workers=10, 
                                                       collate_fn = collate_fn, prefetch_factor=2, multiprocessing_context='fork')

        val_dataloader = torch.utils.data.DataLoader(val_subset, batch_size=1024, shuffle=False, pin_memory=True, num_workers=10, 
                                                    collate_fn = collate_fn, prefetch_factor=2, multiprocessing_context='fork')
        
        #re-initialize weights here
        init_inputs = (next(iter(train_dataloader)))[:2]
        for i, v in enumerate(init_inputs):
            v = v.to(device, non_blocking=True)
            init_inputs[i]=v
            
        model.apply_init(init_inputs, model.init_weights_xavier_uniform)

        num_epochs=5
        for epoch in range(num_epochs):
            model.train()
            for inputs in train_dataloader:
                #move data to GPU
                for k,v in enumerate(inputs):
                    v = v.to(device, non_blocking=True)
                    inputs[k] = v
                
                outputs = model(*inputs[:2])
                loss = criterion(outputs, inputs[2])
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                del inputs
                del loss
                del outputs

#################################################################################

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs in val_dataloader:

                for k, v in enumerate(inputs):
                    v = v.to(device, non_blocking=True)
                    inputs[k] = v

                outputs = model(*inputs[:2]) #mlm_Y_hat and nsp_Y_hat

                softmax_layer = nn.Softmax(dim=1)
                outputs = softmax_layer(outputs)[:,1]
                roc_score = binary_auroc(outputs, inputs[2]).item()
                

                del inputs
                del outputs

        fold_accuracies.append(roc_score)
    del model
    torch.cuda.empty_cache()
    gc.collect()

    return sum(fold_accuracies)/len(fold_accuracies)

In [None]:
study_tabtrans = optuna.create_study(direction="maximize")
study_tabtrans.optimize(objective, n_trials=5, n_jobs=1, show_progress_bar=True)
torch.cuda.empty_cache()  #clear reserved memory

## 4) Hyperparameter Tuning with LightGBM model

In [None]:
import lightgbm as lgb
from category_encoders import TargetEncoder
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.metrics import roc_auc_score
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedKFold
import numpy as np
import gc


def objective(trial):

    #define hyperparameter search space. Keep it simple and just search "lr" and "weight_decay"
    
    lr = trial.suggest_float("lr", 1e-5, 1e0, log=True)
    smoothing = trial.suggest_float("smoothing", 1e-5, 1e0, log=True)
    n_estimators=trial.suggest_int("n_est", 500, 1000)
    num_leaves = trial.suggest_int("n_leaves",10,30)

    params = {
    "boosting_type": "gbdt",
    "objective": "binary",
    "metric": {"l2", "l1"},
    "num_leaves": num_leaves,
    "learning_rate": lr,
    "feature_fraction": 0.9,
    "bagging_fraction": 0.8,
    "bagging_freq": 5,
    "verbose": -1,
    "device_type":"cuda",
    "n_estimators":n_estimators
    }
    
    #model = make_pipeline(col_tran, tab_m)

    kf = StratifiedKFold(n_splits=3, shuffle=True)
    fold_ROC = []

    for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):
        df_train_cross, df_val_cross = df_train.loc[train_idx, CATS+NUMS], df_train.loc[val_idx, CATS+NUMS]
        y_train, y_val = df_target.loc[train_idx], df_target.loc[val_idx]

        preproc = ColumnTransformer([("cats", TargetEncoder(cols=CATS, smoothing = smoothing), CATS),("nums", StandardScaler(), NUMS)])

        df_train_cross = preproc.fit_transform(df_train_cross, y_train)
        df_val_cross = preproc.transform(df_val_cross)

    
        lgb_train = lgb.Dataset(df_train_cross, y_train, categorical_feature=None)
        
        lgb_eval = lgb.Dataset(df_val_cross, y_val, reference=lgb_train, categorical_feature=None)

        #gbm = lgb.LGBMClassifier(**params)

        gbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=lgb_eval, callbacks=[lgb.early_stopping(stopping_rounds=5)])
    
        #gbm.fit(df_train_cross, y_train)
        oof = gbm.predict(df_val_cross, type="response")
        error = roc_auc_score(y_val, oof)
        fold_ROC.append(error)

        del gbm
        del df_train_cross, df_val_cross, y_train, y_val
        gc.collect()
        
    return sum(fold_ROC)/len(fold_ROC)

In [None]:
import optuna

study_lgb = optuna.create_study(direction="maximize")
study_lgb.optimize(objective, n_trials=10, n_jobs=1, show_progress_bar=True)

In [None]:
study_lgb.best_params

## 5) Hyperparameter Tuning with RealMLP model from PyTabkit

In [None]:
from pytabkit import RealMLP_TD_Classifier
from category_encoders import TargetEncoder
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.metrics import roc_auc_score
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedKFold
import numpy as np
import gc
import torch

torch.set_float32_matmul_precision("medium")    #requires tensor cores. Trades precision for performance

df_train_realmlp = df_train.copy()

for col in CATS:
    df_train_realmlp[col] = df_train_realmlp[col].cat.codes

def objective(trial):

    #define hyperparameter search space. Keep it simple and just search "lr" and "weight_decay"
    
    lr = trial.suggest_float("lr", 1e-5, 1e0, log=True)
    
    #model = make_pipeline(col_tran, tab_m)

    kf = StratifiedKFold(n_splits=3, shuffle=True)
    fold_ROC = []

    for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):
        df_train_cross, df_val_cross = df_train_realmlp.loc[train_idx, CATS+NUMS], df_train_realmlp.loc[val_idx, CATS+NUMS]
        y_train, y_val = df_target.loc[train_idx], df_target.loc[val_idx]

        model = RealMLP_TD_Classifier(device='cuda:0', random_state=0, n_cv=1, n_refit=0,
                                  n_epochs=10, batch_size=1024, hidden_sizes=[256] * 3,
                                  val_metric_name='cross_entropy',
                                  use_ls=False,
                                  lr=lr, 
                                  verbosity=-1, val_fraction=0)
        
        model.fit(df_train_cross, y_train, cat_col_names=CATS)
        oof = model.predict_proba(df_val_cross)[:,1]

        error = roc_auc_score(y_val, oof)
        fold_ROC.append(error)

        del model, oof
        del df_train_cross, df_val_cross, y_train, y_val
        gc.collect()
        
    return sum(fold_ROC)/len(fold_ROC)

In [None]:
import optuna

study_realmlp = optuna.create_study(direction="maximize")
study_realmlp.optimize(objective, n_trials=10, n_jobs=1, show_progress_bar=True)

In [None]:
study_realmlp.best_params

## Construct OOF data for metamodel

### 0) Set-up input for the meta model

In [None]:
import torch

input_tensor_for_meta_model = torch.zeros(len(df_train), 2, 4)

### 1) OOF_predictions with best found hyperparameters for TabM_D_Classifier

In [None]:
import optuna
import gc
from pytabkit import TabM_D_Classifier
from category_encoders import TargetEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold





    #define hyperparameter search space. Keep it simple and just search "lr" and "weight_decay"
    
lr = 0.0006036532064312392
smoothing = 4.1277849868115076e-05

param_TabM_inputs = {
    'device': 'cuda',
    'val_metric_name': '1-auc_ovr',
    'random_state': 100,
    'verbosity': -1,
    'arch_type': 'tabm-mini',
    'tabm_k': 32,
    'num_emb_type': 'pwl',
    'd_embedding': 12,
    'batch_size': 256,
    'lr': lr,
    'n_epochs': 10,
    'dropout': 0.1,
    'd_block': 512,
    'n_blocks': 3,
    "compile_model":True,
    "allow_amp":True,
    
}


kf = StratifiedKFold(n_splits=3, shuffle=True)

for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):
    df_train_cross, df_val_cross = df_train.loc[train_idx, CATS+NUMS], df_train.loc[val_idx, CATS+NUMS]
    y_train = df_target.loc[train_idx]

    #instantiate and fit column transformer
    col_tran = ColumnTransformer([("cat", TargetEncoder(cols=CATS, smoothing=smoothing), CATS), 
                              ("num", StandardScaler(), NUMS)])#, remainder="passthrough")
    df_train_cross = col_tran.fit_transform(df_train_cross, y_train)
    df_val_cross = col_tran.transform(df_val_cross)

    #instantiate and fit tabM model
    tab_m = TabM_D_Classifier(**param_TabM_inputs)

    tab_m.fit(df_train_cross, y_train)
    oof = tab_m.predict_proba(df_val_cross)
    input_tensor_for_meta_model[val_idx,:,0] = torch.tensor(oof).to(device="cpu")

    del tab_m
    del df_train_cross, df_val_cross, y_train, oof
    gc.collect()
        


### 2) OOF_predictions with best found hyperparameters for XGBClassifier

In [None]:
from xgboost import XGBClassifier
from category_encoders import TargetEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

lr = 0.18539072905254378
smoothing = 0.0001579817099466443
max_depth = 4
n_estimators=944

#model = make_pipeline(col_tran, tab_m)

kf = StratifiedKFold(n_splits=3, shuffle=True)

for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):
    df_train_cross, df_val_cross = df_train.loc[train_idx, CATS+NUMS], df_train.loc[val_idx, CATS+NUMS]
    y_train = df_target.loc[train_idx]

    preproc = ColumnTransformer([("cats", TargetEncoder(cols=CATS, smoothing = smoothing), CATS),
                        ("nums", StandardScaler(), NUMS)])
    est = XGBClassifier(device = "cuda", learning_rate=lr, max_depth=max_depth, n_estimators=n_estimators)
    model = make_pipeline(preproc, est)

    model.fit(df_train_cross, y_train)
    oof = model.predict_proba(df_val_cross)

    input_tensor_for_meta_model[val_idx,:,1] = torch.tensor(oof).to(device="cpu")

    del model
    del df_train_cross, df_val_cross, y_train, oof
    gc.collect()

### 3) OOF_predictions with best found hyperparameters for TabTransformer

In [None]:
from sklearn.model_selection import StratifiedKFold
import gc


#define hyperparameter search space. Keep it simple and just search "lr" and "weight_decay"

lr = 0.012835801665180483

weight_decay = 1.9128672660942447e-05

#define model, optimizer and criterion    
model = TabTransformer(cat_cols=CATS, num_cols=NUMS, num_hiddens=32, ffn_num_hiddens=128, num_heads=8, num_blks=6, dropout=0.2)
device = torch.device("cuda:0")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

criterion = nn.CrossEntropyLoss()


#define data and cross-validation splitting values

kf = StratifiedKFold(n_splits=3, shuffle=True)
fold_accuracies=[]

for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):

    df_train_cross, df_val_cross = df_train.loc[train_idx, CATS+NUMS], df_train.loc[val_idx, CATS+NUMS]
    y_train, y_val = df_target.loc[train_idx], df_target.loc[val_idx]

    train_subset = MyDataset(df_train_cross, CATS, NUMS, y_train)
    val_subset = MyDataset(df_val_cross, CATS, NUMS, y_val)
    

    train_dataloader = torch.utils.data.DataLoader(train_subset, batch_size=1024, shuffle=True, pin_memory=True, num_workers=10, 
                                                   collate_fn = collate_fn, prefetch_factor=2, multiprocessing_context='fork')

    val_dataloader = torch.utils.data.DataLoader(val_subset, batch_size=2048, shuffle=False, pin_memory=True, num_workers=10, 
                                                collate_fn = collate_fn, prefetch_factor=2, multiprocessing_context='fork')
    
    #re-initialize weights here
    init_inputs = (next(iter(train_dataloader)))[:2]
    for i, v in enumerate(init_inputs):
        v = v.to(device, non_blocking=True)
        init_inputs[i]=v
        
    model.apply_init(init_inputs, model.init_weights_xavier_uniform)

    num_epochs=1
    for epoch in range(num_epochs):
        model.train()
        for inputs in train_dataloader:
            #move data to GPU
            for k,v in enumerate(inputs):
                v = v.to(device, non_blocking=True)
                inputs[k] = v
            
            outputs = model(*inputs[:2])
            loss = criterion(outputs, inputs[2])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            del inputs
            del loss
            del outputs

#################################################################################

    model.eval()
    with torch.no_grad():
        i=0
        for inputs in val_dataloader:

            for k, v in enumerate(inputs):
                v = v.to(device, non_blocking=True)
                inputs[k] = v

            oof = model(*inputs[:2]) #mlm_Y_hat and nsp_Y_hat

            softmax_layer = nn.Softmax(dim=1)
            oof = softmax_layer(oof)

            input_tensor_for_meta_model[val_idx[i*2048:(i+1)*2048],:,2] = oof.to(device="cpu")
            i+=1

    del df_train_cross, df_val_cross, y_train, oof
    torch.cuda.empty_cache()
    gc.collect()
del model
torch.cuda.empty_cache()
gc.collect()

### 4) OOF_predictions with best found hyperparameters for LightGBM model

In [None]:
import lightgbm as lgb
from category_encoders import TargetEncoder
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.metrics import roc_auc_score
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedKFold
import numpy as np
import gc




#define hyperparameter search space. Keep it simple and just search "lr" and "weight_decay"



lr = 0.019033709394977512
smoothing = 0.011297091655658249
n_estimators= 867
num_leaves = 24

params = {
"boosting_type": "gbdt",
"objective": "binary",
"metric": {"l2", "l1"},
"num_leaves": num_leaves,
"learning_rate": lr,
"feature_fraction": 0.9,
"bagging_fraction": 0.8,
"bagging_freq": 5,
"verbose": -1,
"device_type":"cuda",
"n_estimators":n_estimators
}

#model = make_pipeline(col_tran, tab_m)

kf = StratifiedKFold(n_splits=3, shuffle=True)

for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):
    df_train_cross, df_val_cross = df_train.loc[train_idx, CATS+NUMS], df_train.loc[val_idx, CATS+NUMS]
    y_train, y_val = df_target.loc[train_idx], df_target.loc[val_idx]

    preproc = ColumnTransformer([("cats", TargetEncoder(cols=CATS, smoothing = smoothing), CATS),("nums", StandardScaler(), NUMS)])

    df_train_cross = preproc.fit_transform(df_train_cross, y_train)
    df_val_cross = preproc.transform(df_val_cross)


    lgb_train = lgb.Dataset(df_train_cross, y_train, categorical_feature=None)
    
    lgb_eval = lgb.Dataset(df_val_cross, y_val, reference=lgb_train, categorical_feature=None)

    #gbm = lgb.LGBMClassifier(**params)

    gbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=lgb_eval, callbacks=[lgb.early_stopping(stopping_rounds=5)])

    #gbm.fit(df_train_cross, y_train)
    oof = gbm.predict(df_val_cross, type="response")

    prob_pos_class = torch.tensor(oof).to(device="cpu", dtype=torch.float32)

    prob_neg_class = 1-prob_pos_class

    X = torch.stack([prob_neg_class, prob_pos_class], dim=1)

    input_tensor_for_meta_model[val_idx,:,3] = X

    del gbm
    del df_train_cross, df_val_cross, y_train, y_val, X
    torch.cuda.empty_cache()
    gc.collect()

### 5) OOF_predictions with best found hyperparameters for RealMLP model

In [None]:
import torch

input_tensor_from_realmlp = torch.zeros(len(df_train), 2, 1)

from pytabkit import RealMLP_TD_Classifier
from category_encoders import TargetEncoder
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.metrics import roc_auc_score
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedKFold
import numpy as np
import gc
import torch

torch.set_float32_matmul_precision("medium")    #requires tensor cores. Trades precision for performance

df_train_realmlp = df_train.copy()

for col in CATS:
    df_train_realmlp[col] = df_train_realmlp[col].cat.codes

    
lr = 0.8301728407699202


kf = StratifiedKFold(n_splits=3, shuffle=True)


for train_idx, val_idx in kf.split(np.zeros(len(df_train)), df_target):
    df_train_cross, df_val_cross = df_train_realmlp.loc[train_idx, CATS+NUMS], df_train_realmlp.loc[val_idx, CATS+NUMS]
    y_train = df_target.loc[train_idx] 

    model = RealMLP_TD_Classifier(device='cuda:0', random_state=0, n_cv=1, n_refit=0,
                              n_epochs=10, batch_size=1024, hidden_sizes=[256] * 3,
                              val_metric_name='cross_entropy',
                              use_ls=False,
                              lr=lr, 
                              verbosity=-1, val_fraction=0)
    
    model.fit(df_train_cross, y_train, cat_col_names=CATS)
    oof = model.predict_proba(df_val_cross)

    input_tensor_from_realmlp[val_idx,:,0] = torch.tensor(oof).to(device="cpu")

    del model, oof
    del df_train_cross, df_val_cross, y_train
    gc.collect()

# Save input_tensor_for_meta_model

In [None]:
import torch
from safetensors.torch import save_file

tensor_to_save = {"my_first_tensor":input_tensor_for_meta_model}

save_file(tensor_to_save, "input_to_meta_model.safetensors")

## Train meta_model: We choose CatBoost meta-model

### 0.0) load input_to_meta_model

In [None]:
from safetensors.torch import load_file

loaded_tensor = load_file("input_to_meta_model.safetensors")

input_tensor_for_meta_model = loaded_tensor["my_first_tensor"]

### 0) Convert input torch tensor to numpy tensor and reshape from len(df_train),2,4 to len(df_train),8

In [None]:
meta_input = input_tensor_for_meta_model.reshape(-1,10).numpy()
meta_target = df_target.values

### 1) Train CatBoost meta-model

In [None]:
from catboost import CatBoostClassifier

# Initialize the CatBoost classifier
catboost_model = CatBoostClassifier(iterations=1000, learning_rate=0.1, depth=6, random_state=42, verbose=0, task_type='GPU')

# Train the CatBoost model
catboost_model.fit(meta_input, meta_target)

### 1.1) Let's save the catboost (meta) model

In [None]:
catboost_model.save_model("dirty_data_ensemble.cbm")

## Train each base model on the entire dataset

### 1) Train TabM_D_Classifier 

In [None]:
import optuna
import gc
from pytabkit import TabM_D_Classifier
from category_encoders import TargetEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np
from sklearn.metrics import roc_auc_score
import torch

#Use the best hyperparameters again
    
lr = 0.0006036532064312392
smoothing = 4.1277849868115076e-05

param_TabM_inputs = {
    'device': 'cuda:0',
    'val_metric_name': '1-auc_ovr',
    'random_state': 100,
    'verbosity': 2,
    'arch_type': 'tabm-mini',
    'tabm_k': 32,
    'num_emb_type': 'pwl',
    'd_embedding': 12,
    'batch_size': 512,
    'lr': lr,
    'n_epochs': 10,
    'dropout': 0.1,
    'd_block': 512,  ##smaller for shorter training
    'n_blocks': 3,   ##smaller for shorted training
    "compile_model":True,
    "allow_amp":True,
    "val_fraction":0.4
}

col_tran = ColumnTransformer([("cat", TargetEncoder(cols=CATS, smoothing=smoothing), CATS), 
                          ("num", StandardScaler(), NUMS)])#, remainder="passthrough")
df_train_tabm = col_tran.fit_transform(df_train, df_target)


#instantiate and fit tabM model
tab_m = TabM_D_Classifier(**param_TabM_inputs)

tab_m.fit(df_train_tabm, df_target)

### 1.1) Save the tab_m model

In [None]:
import pickle
# Assume 'model' is your trained machine learning model
filename = 'my_tabm_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(tab_m, file)

### 2)Train XGBClassifier

In [None]:
from xgboost import XGBClassifier
from category_encoders import TargetEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

lr = 0.18539072905254378
smoothing = 0.0001579817099466443
max_depth = 4
n_estimators=944

preproc = ColumnTransformer([("cats", TargetEncoder(cols=CATS, smoothing = smoothing), CATS),
                    ("nums", StandardScaler(), NUMS)])
est = XGBClassifier(device = "cuda", learning_rate=lr, max_depth=max_depth, n_estimators=n_estimators)
xgb_model = make_pipeline(preproc, est)

xgb_model.fit(df_train, df_target)


### 2.1) Save XGBClassifier

In [None]:
import pickle
# Assume 'model' is your trained machine learning model
filename = 'my_xgb_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(xgb_model, file)

### 3) Train TabTransformer model

In [None]:
from sklearn.model_selection import StratifiedKFold
import gc


#define hyperparameter search space. Keep it simple and just search "lr" and "weight_decay"

lr = 0.012835801665180483

weight_decay = 1.9128672660942447e-05

#define model, optimizer and criterion    
model = TabTransformer(cat_cols=CATS, num_cols=NUMS, num_hiddens=32, ffn_num_hiddens=128, num_heads=8, num_blks=6, dropout=0.2)
device = torch.device("cuda:0")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

criterion = nn.CrossEntropyLoss()



train_subset = MyDataset(df_train, CATS, NUMS, df_target)



train_dataloader = torch.utils.data.DataLoader(train_subset, batch_size=512, shuffle=True, pin_memory=True, num_workers=10, 
                                               collate_fn = collate_fn, prefetch_factor=2, multiprocessing_context='fork')


#re-initialize weights here
init_inputs = (next(iter(train_dataloader)))[:2]
for i, v in enumerate(init_inputs):
    v = v.to(device, non_blocking=True)
    init_inputs[i]=v
    
model.apply_init(init_inputs, model.init_weights_xavier_uniform)

num_epochs=1
for epoch in range(num_epochs):
    model.train()
    for inputs in train_dataloader:
        #move data to GPU
        for k,v in enumerate(inputs):
            v = v.to(device, non_blocking=True)
            inputs[k] = v
        
        outputs = model(*inputs[:2])
        loss = criterion(outputs, inputs[2])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        del inputs
        del loss
        del outputs

#################################################################################
torch.cuda.empty_cache()
gc.collect()

### 3.1) Save TabTransformer model

In [None]:
from safetensors.torch import save_file

state_dict = model.state_dict()

save_file(state_dict, "tab_transformer_model.safetensors")

### 4) Train LightGBM model

In [None]:
import lightgbm as lgb
from category_encoders import TargetEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedKFold
import numpy as np


lr = 0.019033709394977512
smoothing = 0.011297091655658249
n_estimators= 867
num_leaves = 24

params = {
"boosting_type": "gbdt",
"objective": "binary",
"metric": {"l2", "l1"},
"num_leaves": num_leaves,
"learning_rate": lr,
"feature_fraction": 0.9,
"bagging_fraction": 0.8,
"bagging_freq": 5,
"verbose": -1,
"device_type":"cuda",
"n_estimators":n_estimators
}

preproc = ColumnTransformer([("cats", TargetEncoder(cols=CATS, smoothing = smoothing), CATS),
                    ("nums", StandardScaler(), NUMS)])

df_train_cross = preproc.fit_transform(df_train, df_target)

lgb_train = lgb.Dataset(df_train_cross, df_target, categorical_feature=None)

gbm = lgb.train(params, lgb_train, num_boost_round=20)

### 4.1) Save LightGBM model

In [None]:
gbm.save_model("lightgbm.txt")

### 5) Train RealMLP model 

In [None]:
from pytabkit import RealMLP_TD_Classifier
from category_encoders import TargetEncoder
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.metrics import roc_auc_score
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedKFold
import numpy as np
import gc
import torch

torch.set_float32_matmul_precision("medium")    #requires tensor cores. Trades precision for performance

df_train_realmlp = df_train.copy()

for col in CATS:
    df_train_realmlp[col] = df_train_realmlp[col].cat.codes

lr = 0.8301728407699202

model = RealMLP_TD_Classifier(device='cuda:0', random_state=0, n_cv=1, n_refit=0,
                          n_epochs=10, batch_size=1024, hidden_sizes=[256] * 3,
                          val_metric_name='cross_entropy',
                          use_ls=False,
                          lr=lr, 
                          verbosity=-1, val_fraction=0)

model.fit(df_train_realmlp, df_target, cat_col_names=CATS)

### 5.1) Save RealMLP model 

In [None]:
import pickle
# Assume 'model' is your trained machine learning model
filename = 'my_realmlp_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(model, file)

## 4) Create stacked (generalization model) of previous base learners and meta-model for inference

In [None]:
pathtest = "/home/ildar/.cache/kagglehub/datasets/amruthayenikonda/dirty-dataset-to-practice-data-cleaning/versions/1/test.csv"

test = pd.read_csv(pathtest, index_col="id")

In [None]:
test.info()

### 4.1) Perform the same preprocessing operations on test as was done for df_train

In [None]:
#cyclical features

import numpy as np

df_test = test.copy()

df_test['_pdays_sin'] = np.sin(2*np.pi * df_test['pdays'] / 365).astype('float32')
df_test['_pdays_cos'] = np.cos(2*np.pi*df_test['pdays']/365).astype('float32')

In [None]:
#convert balance and duration to float32

df_test["balance"] = df_test["balance"].astype("float32")
df_test["duration"] = df_test["duration"].astype("float32")

In [None]:
replacement_values = {'may': 5,
 'aug': 8,
 'jul': 7,
 'jun': 6,
 'nov': 11,
 'apr': 4,
 'feb': 2,
 'jan': 1,
 'oct': 10,
 'sep': 9,
 'mar': 3,
 'dec': 12}

df_test["data"] = (df_test["month"].replace(replacement_values)-1)*12+df_test["day"]

In [None]:
df_test["_date_cos"] = np.cos(2*np.pi*df_test["data"]/365).astype("float32")
df_test["_date_sin"] = np.sin(2*np.pi*df_test["data"]/365).astype("float32")

In [None]:
df_test["was_not_contacted"] = (df_test["pdays"]==-1).astype("category")

In [None]:
#convert duration and balance by logarithms

df_test["balance_log"] = np.sign(df_test["balance"])*np.log1p(np.abs(df_test["balance"]))
df_test["duration_log"] = np.log1p(df_test["duration"])

In [None]:
#convert int64 and object dtypes to category
cols = df_test.select_dtypes(["int64", "object"]).columns.to_list()

for col in cols:
    df_test[col] = df_test[col].astype("category")

In [None]:
from itertools import combinations

CATS = df_test.select_dtypes("category").columns.to_list()
NUMS = df_test.select_dtypes("float32").columns.to_list()

for col in combinations(CATS,2):
    name = "_".join(col)
    df_test[name] = (df_test[col[0]].astype("str")+"_"+df_test[col[1]].astype("str")).astype("category")

In [None]:
for col in combinations(NUMS, 2):
    name = "x".join(col)
    df_test[name] = df_test[col[0]]*df_test[col[1]]

In [None]:
df_test.info()

In [None]:
CATS = df_test.select_dtypes("category").columns.to_list()
NUMS = df_test.select_dtypes("number").columns.to_list()

### 4.2) Build the input for meta-model

### 4.2.0) Set-up input for the meta model

In [None]:
input_tensor_for_meta_model_preds = torch.zeros(len(df_test), 2, 4)

### 4.2.1.0) Load TabM_D_Classifier

In [None]:
import pickle
# Assume 'model' is your trained machine learning model
filename = 'my_tabm_model.pkl'

with open(filename, 'rb') as file:
    tabm_model = pickle.load(file)

### 4.2.1.1) Outputs of TabM_D_Classifier

In [None]:
import optuna
import gc
from pytabkit import TabM_D_Classifier
from category_encoders import TargetEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np
from sklearn.metrics import roc_auc_score
import torch
from sklearn.model_selection import StratifiedKFold




smoothing = 4.1277849868115076e-05

col_tran = ColumnTransformer([("cat", TargetEncoder(cols=CATS, smoothing=smoothing), CATS), 
                          ("num", StandardScaler(), NUMS)])#, remainder="passthrough")
col_tran.fit(df_train, y=df_target)


df_test_tabm = col_tran.transform(df_test)

oof = tabm_model.predict_proba(df_test_tabm)
input_tensor_for_meta_model_preds[:,:,0] = torch.tensor(oof).to(device="cpu")

del tabm_model
del oof
torch.cuda.empty_cache()
gc.collect()
        

### 4.2.2.0) Load XGBClassifier

In [None]:
import pickle

filename = 'my_xgb_model.pkl'
with open(filename, 'rb') as file:
    xgb_model = pickle.load(file)

### 4.2.2.1) Outputs of XGBClassifier

In [None]:
oof = xgb_model.predict_proba(df_test)
input_tensor_for_meta_model_preds[:,:,1] = torch.tensor(oof).to(device="cpu")

del xgb_model
del oof
gc.collect()


### 4.2.3.0) Load TabTransformer model

In [None]:
from safetensors.torch import load_file

tabt_model = TabTransformer(cat_cols=CATS, num_cols=NUMS, num_hiddens=32, ffn_num_hiddens=128, num_heads=8, num_blks=6, dropout=0.2)

device = torch.device("cuda:0")
tabt_model.to(device)

state_dict = load_file("tab_transformer_model.safetensors")

# Load the state_dict into the model
tabt_model.load_state_dict(state_dict)

### 4.2.3.1) Outputs of TabTransformer model

In [None]:
test_subset = MyDataset(df_test, CATS, NUMS, df_target[:250000])


test_dataloader = torch.utils.data.DataLoader(test_subset, batch_size=2048, shuffle=False, pin_memory=True, num_workers=10, 
                                               collate_fn = collate_fn, prefetch_factor=2, multiprocessing_context='fork')



#################################################################################

tabt_model.eval()
with torch.no_grad():
    i=0
    for inputs in test_dataloader:

        for k, v in enumerate(inputs):
            v = v.to(device, non_blocking=True)
            inputs[k] = v

        oof = tabt_model(*inputs[:2]) #mlm_Y_hat and nsp_Y_hat

        softmax_layer = nn.Softmax(dim=1)
        oof = softmax_layer(oof)

        input_tensor_for_meta_model_preds[i*2048:(i+1)*2048,:,2] = oof.to(device="cpu")
        i+=1

        del oof
del tabt_model
torch.cuda.empty_cache()
gc.collect()

### 4.2.4.0) Load LightGBM model

In [None]:
import lightgbm as lgb

bst_loaded = lgb.Booster(model_file="lightgbm.txt")

### 4.2.4.1) Outputs of LightGBM model

In [None]:
col_tran = ColumnTransformer([("cat", TargetEncoder(cols=CATS, smoothing=smoothing), CATS), 
                          ("num", StandardScaler(), NUMS)])#, remainder="passthrough")
col_tran.fit(df_train, y=df_target)


df_test_lightgbm = col_tran.transform(df_test)



oof = bst_loaded.predict(df_test_lightgbm, type="response")

prob_pos_class = torch.tensor(oof).to(device="cpu", dtype=torch.float32)

prob_neg_class = 1-prob_pos_class

X = torch.stack([prob_neg_class, prob_pos_class], dim=1)

input_tensor_for_meta_model_preds[:,:,3] = X

del bst_loaded
del oof, X
torch.cuda.empty_cache()
gc.collect()

### 4.2.5.0) Load RealMLP model

In [None]:
import pickle

filename = 'my_realmlp_model.pkl'
with open(filename, 'rb') as file:
    realmlp_model = pickle.load(file)

### 4.2.5.1) Outputs of RealMLP model

In [None]:
input_tensor_from_realmlp_preds = torch.zeros(len(df_test), 2, 1)

torch.set_float32_matmul_precision("medium")    #requires tensor cores. Trades precision for performance

df_test_realmlp = df_test.copy()

for col in CATS:
    df_test_realmlp[col] = df_test_realmlp[col].cat.codes

oof = realmlp_model.predict_proba(df_test_realmlp)
input_tensor_from_realmlp_preds[:,:,0] = torch.tensor(oof).to(device="cpu")

del realmlp_model
del oof
gc.collect()

In [None]:
import torch
from safetensors.torch import load_file

loaded_tensor = load_file("input_to_meta_model_preds.safetensors")

input_to_meta_model_preds = loaded_tensor["my_first_tensor"]

In [None]:
input_to_meta_model_preds = torch.cat([input_to_meta_model_preds, input_tensor_from_realmlp_preds], dim=2)

In [None]:
input_tensor_for_meta_model_preds = input_to_meta_model_preds

### 4.3) Save input for meta-model

In [None]:
import torch
from safetensors.torch import save_file

tensor_to_save = {"my_first_tensor":input_tensor_for_meta_model_preds}

save_file(tensor_to_save, "input_to_meta_model_preds.safetensors")

### 4.3) Inference of meta-model

### 4.3.1) Prepare input

In [None]:
meta_input_preds = input_tensor_for_meta_model_preds.reshape(-1,10).numpy()

### 4.3.2) load meta model

In [None]:
ensembler = CatBoostClassifier(iterations=1000, learning_rate=0.1, depth=6, random_state=42, verbose=0, task_type='GPU')
ensembler.load_model("dirty_data_ensemble.cbm")

In [None]:
Y = ensembler.predict_proba(meta_input_preds)[:,1]

In [None]:
submission = test.reset_index()

In [None]:
submission = submission[["id"]]

In [None]:
submission["Y"] = Y

In [None]:
submission.to_csv("/home/ildar/out.csv", index=False)

In [None]:
Hi