In [1]:
local = True

# About this notebook
- Deberta-v3-large starter code
- pip wheels is [here](https://www.kaggle.com/code/yasufuminakama/pppm-pip-wheels)
- Training notebook is [here](https://www.kaggle.com/code/yasufuminakama/pppm-deberta-v3-large-baseline-w-w-b-train)

If this notebook is helpful, feel free to upvote :)

In [2]:
# ====================================================
# Directory settings
# ====================================================
exp_names = ["funnel-transformer-large512",
             "deberta-v3-base",
             "microsoft-deberta-large"
            ]

import os
if local:
    INPUT_DIR = '../../data/us-patent-phrase-to-phrase-matching/'
    
    OUTPUT_DIR = f"./output/stacking1dcnn/"
else:
    INPUT_DIR = '../input/us-patent-phrase-to-phrase-matching/'
    OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [3]:
def get_logger(filename=OUTPUT_DIR+'train'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

# CFG

In [4]:
# ====================================================
# CFG
# ====================================================
class CFG:
    
    EXP_NAMES = exp_names
    models=["funnel-transformer-large512",
            "microsoft/deberta-v3-base",
            "microsoft/deberta-v3-large"
           ]
    folder_names = ["funnellarge512fold4",
                   "debertabase",
                   "debertalarge"
                  ]
    paths = []
    config_paths = []
    model_paths = []
    for EXP_NAME in EXP_NAMES:
        if local:
            path=f"../exp4/output/{EXP_NAME}/"
            config_path=path+'config.pth'
            model_path=f'../exp4/output/{EXP_NAME}/'
            cpc_path = f"{INPUT_DIR}/cpc_texts.pth"
        else:
            path=f"../input/{folder_name}/{EXP_NAME}/"
            config_path=path+'config.pth'
            model_path=f'../input/{folder_name}/'
            cpc_path = "../input/pppm-deberta-v3-large-baseline-w-w-b-train/cpc_texts.pth"
        paths.append(path)
        config_paths.append(config_path)
        model_paths.append(model_path)
        
    num_workers=4
    hidden_states = []
    for model in models:
        if "small" in model:
            hidden_state  = 512
        elif "base" in model:
            hidden_state  = 768
        elif "large" in model:
            hidden_state  = 1024
        elif "xlarge" in model:
            hidden_state =  1536
        hidden_states.append(hidden_state)
    
    batch_size=32
    fc_dropout=0.2
    target_size=1
    max_len=133
    seed=42
    n_fold=4
    trn_fold=[i for i in range(n_fold)]
    pass_fold = []
#     torch.load(CFG.model_path+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth",
    

# Library

In [5]:
# ====================================================
# Library
# ====================================================
import os
import gc
import re
import ast
import sys
import copy
import json
import time
import math
import shutil
import string
import pickle
import random
import joblib
import itertools
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

import torch
print(f"torch.__version__: {torch.__version__}")
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset

if not local:
    os.system('pip uninstall -y transformers')
    os.system('pip uninstall -y tokenizers')
    os.system('python -m pip install --no-index --find-links=../input/pppm-pip-wheels-dataset transformers')
    os.system('python -m pip install --no-index --find-links=../input/pppm-pip-wheels-dataset tokenizers')
import tokenizers
import transformers
print(f"tokenizers.__version__: {tokenizers.__version__}")
print(f"transformers.__version__: {transformers.__version__}")
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
%env TOKENIZERS_PARALLELISM=true

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device("cpu")

torch.__version__: 1.9.0+cu111
tokenizers.__version__: 0.11.0
transformers.__version__: 4.16.2
env: TOKENIZERS_PARALLELISM=true


In [6]:
import numpy as np
import random
import pandas as pd
from copy import deepcopy as dp

from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.modules.loss import _WeightedLoss


# Utils

In [7]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = sp.stats.pearsonr(y_true, y_pred)[0]
    return score




def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

# OOF

In [8]:
# oof_df = pd.read_pickle(CFG.path+'oof_df.pkl')
# labels = oof_df['score'].values
# preds = oof_df['pred'].values
# score = get_score(labels, preds)
# LOGGER.info(f'CV Score: {score:<.4f}')
oof_dfs = pd.DataFrame()
for path in CFG.paths:
    oof_file = [f for f in os.listdir(path) if "oof_df.pkl" in f][0]
    oof_df = pd.read_pickle(path+oof_file)
    labels = oof_df['score'].values
    preds = oof_df[["id",'pred']]
    score = get_score(labels, preds["pred"])
    LOGGER.info(f'CV Score: {score:<.4f}')
    oof_dfs = pd.concat([oof_dfs, oof_df[["pred"]]],axis=1)
oof_dfs.columns = [c+str(i) for i, c in enumerate(oof_dfs.columns)]
oof_dfs["id"] = preds["id"]


CV Score: 0.8485
CV Score: 0.8422
CV Score: 0.8574


In [9]:
oof_dfs

Unnamed: 0,pred0,pred1,pred2,id
0,0.000880,0.003149,0.000215,54c1e3b9184cb5b6
1,0.236394,0.247720,0.241680,ef2d4c2e6bbb208d
2,0.428095,0.501632,0.503771,4c3f2750e7540ab7
3,0.482324,0.536995,0.479786,bfd7270f57530991
4,0.273144,0.073328,0.005082,cc96541d4987b399
...,...,...,...,...
36468,0.265766,0.257354,0.249057,ede41dd2a61bb0a9
36469,0.999820,0.794225,0.819764,4f366b6369dbfbf2
36470,0.523277,0.581828,0.504144,51421420985d5c93
36471,0.999939,0.998544,0.999254,8ceaa2b5c2d56250


In [10]:
# ../input/d/datasets/trevenanter/robertalarge/roberta-large/2022-04-23-20-31-47oof_df.pkl

# Data Loading

In [11]:
# # ====================================================
# # Data Loading
# # ====================================================
# test = pd.read_csv(INPUT_DIR+'test.csv')
# submission = pd.read_csv(INPUT_DIR+'sample_submission.csv')
# print(f"test.shape: {test.shape}")
# print(f"submission.shape: {submission.shape}")
# display(test.head())
# display(submission.head())

In [12]:
# # ====================================================
# # CPC Data
# # ====================================================
# cpc_texts = torch.load(CFG.cpc_path)
# test['context_text'] = test['context'].map(cpc_texts)
# display(test.head())

In [13]:
# test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']
# display(test.head())

# tokenizer

In [14]:
# # ====================================================
# # tokenizer
# # ====================================================
# # CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+'tokenizer/')
# # CFG.tokenizer = AutoTokenizer.from_pretrained("../input/roberta-large-tokenizer/tokenizer/", use_fast=False)

# CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+"tokenizer/",use_fast=False)
# # CFG.path+'tokenizer/'
# # AutoTokenizer.from_pretrained('../input/robertalarge/roberta-large/2022-04-23-20-31-47tokenizer')

In [15]:
# AutoTokenizer.from_pretrained('../input/robertalarge/roberta-large/2022-04-23-20-31-47tokenizer', use_fast=True)

# Dataset

In [16]:
# # ====================================================
# # Dataset
# # ====================================================
# def prepare_input(cfg, text):
#     inputs = cfg.tokenizer(text,
#                            add_special_tokens=True,
#                            max_length=cfg.max_len,
#                            padding="max_length",
#                            return_offsets_mapping=False)
#     for k, v in inputs.items():
#         inputs[k] = torch.tensor(v, dtype=torch.long)
#     return inputs


# class TestDataset(Dataset):
#     def __init__(self, cfg, df):
#         self.cfg = cfg
#         self.texts = df['text'].values

#     def __len__(self):
#         return len(self.texts)

#     def __getitem__(self, item):
#         inputs = prepare_input(self.cfg, self.texts[item])
#         return inputs

# Model

In [17]:
# # ====================================================
# # Model
# # ====================================================
# class CustomModel(nn.Module):
#     def __init__(self, cfg, config_path=None, pretrained=False):
#         super().__init__()
#         self.cfg = cfg
#         if config_path is None:
#             self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
#         else:
#             self.config = torch.load(config_path)
#         if pretrained:
#             self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
#         else:
#             self.model = AutoModel.from_config(self.config)
#         self.fc_dropout = nn.Dropout(cfg.fc_dropout)
#         self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
#         self._init_weights(self.fc)
#         self.attention = nn.Sequential(
#             nn.Linear(self.config.hidden_size, cfg.hidden_state),
#             nn.Tanh(),
#             nn.Linear(cfg.hidden_state, 1),
#             nn.Softmax(dim=1)
#         )
#         self._init_weights(self.attention)
        
#     def _init_weights(self, module):
#         if isinstance(module, nn.Linear):
#             module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
#             if module.bias is not None:
#                 module.bias.data.zero_()
#         elif isinstance(module, nn.Embedding):
#             module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
#             if module.padding_idx is not None:
#                 module.weight.data[module.padding_idx].zero_()
#         elif isinstance(module, nn.LayerNorm):
#             module.bias.data.zero_()
#             module.weight.data.fill_(1.0)
        
#     def feature(self, inputs):
#         outputs = self.model(**inputs)
#         last_hidden_states = outputs[0]
#         # feature = torch.mean(last_hidden_states, 1)
#         weights = self.attention(last_hidden_states)
#         feature = torch.sum(weights * last_hidden_states, dim=1)
#         return feature

#     def forward(self, inputs):
#         feature = self.feature(inputs)
#         output = self.fc(self.fc_dropout(feature))
#         return output

# inference

In [18]:
# # ====================================================
# # inference
# # ====================================================
# def inference_fn(test_loader, model, device):
#     preds = []
#     model.eval()
#     model.to(device)
#     tk0 = tqdm(test_loader, total=len(test_loader))
#     for inputs in tk0:
#         for k, v in inputs.items():
#             inputs[k] = v.to(device)
#         with torch.no_grad():
#             y_preds = model(inputs)
#         preds.append(y_preds.sigmoid().to('cpu').numpy())
#     predictions = np.concatenate(preds)
#     return predictions

In [19]:
# all_predictions = pd.DataFrame()
# for exp_num in range(len(CFG.EXP_NAMES)):
#     test_dataset = TestDataset(CFG, test)
#     test_loader = DataLoader(test_dataset,
#                              batch_size=CFG.batch_size,
#                              shuffle=False,
#                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
#     predictions = []
#     for fold in CFG.trn_fold:
#         if fold in CFG.pass_fold:
#             continue
#         model = CustomModel(CFG, config_path=CFG.config_paths[exp_num], pretrained=False)
#         if local:
#             state = torch.load(CFG.model_paths[exp_num]+f"{CFG.models[exp_num].replace('/', '-')}_fold{fold}_best.pth",
#                            map_location=torch.device('cpu'))
#         else:
#             state = torch.load(CFG.model_paths[exp_num]+f"{CFG.EXP_NAMES[exp_name]}/{CFG.models[exp_name].replace('/', '-')}_fold{fold}_best.pth",
#                            map_location=torch.device('cpu'))
#         model.load_state_dict(state['model'])
#         prediction = inference_fn(test_loader, model, device)
#         predictions.append(prediction)
#         del model, state, prediction; gc.collect()
#         torch.cuda.empty_cache()
#     predictions = np.mean(predictions, axis=0)
#     all_predictions = pd.concat([all_predictions,pd.DataFrame(predictions)], axis=1)

# # pd.concat([pd.DataFrame(predictions),pd.DataFrame(predictions)], axis=1)

# 1DCNN data

In [20]:
class oneDCNN(nn.Module):

        def __init__(self, num_features, num_targets, hidden_size):
            super(oneDCNN, self).__init__()
            cha_1 = 256
            cha_2 = 512
            cha_3 = 512

            cha_1_reshape = int(hidden_size/cha_1)
            cha_po_1 = int(hidden_size/cha_1/2)
            cha_po_2 = int(hidden_size/cha_1/2/2) * cha_3

            self.cha_1 = cha_1
            self.cha_2 = cha_2
            self.cha_3 = cha_3
            self.cha_1_reshape = cha_1_reshape
            self.cha_po_1 = cha_po_1
            self.cha_po_2 = cha_po_2

            self.batch_norm1 = nn.BatchNorm1d(num_features)
            self.dropout1 = nn.Dropout(0.1)
            self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))

            self.batch_norm_c1 = nn.BatchNorm1d(cha_1)
            self.dropout_c1 = nn.Dropout(0.1)
            self.conv1 = nn.utils.weight_norm(nn.Conv1d(cha_1,cha_2, kernel_size = 5, stride = 1, padding=2,  bias=False),dim=None)

            self.ave_po_c1 = nn.AdaptiveAvgPool1d(output_size = cha_po_1)

            self.batch_norm_c2 = nn.BatchNorm1d(cha_2)
            self.dropout_c2 = nn.Dropout(0.1)
            self.conv2 = nn.utils.weight_norm(nn.Conv1d(cha_2,cha_2, kernel_size = 3, stride = 1, padding=1, bias=True),dim=None)

            self.batch_norm_c2_1 = nn.BatchNorm1d(cha_2)
            self.dropout_c2_1 = nn.Dropout(0.3)
            self.conv2_1 = nn.utils.weight_norm(nn.Conv1d(cha_2,cha_2, kernel_size = 3, stride = 1, padding=1, bias=True),dim=None)

            self.batch_norm_c2_2 = nn.BatchNorm1d(cha_2)
            self.dropout_c2_2 = nn.Dropout(0.2)
            self.conv2_2 = nn.utils.weight_norm(nn.Conv1d(cha_2,cha_3, kernel_size = 5, stride = 1, padding=2, bias=True),dim=None)

            self.max_po_c2 = nn.MaxPool1d(kernel_size=4, stride=2, padding=1)

            self.flt = nn.Flatten()

            self.batch_norm3 = nn.BatchNorm1d(cha_po_2)
            self.dropout3 = nn.Dropout(0.2)
            self.dense3 = nn.utils.weight_norm(nn.Linear(cha_po_2, num_targets))

        def forward(self, x):

            x = self.batch_norm1(x)
            x = self.dropout1(x)
            x = F.celu(self.dense1(x), alpha=0.06)

            x = x.reshape(x.shape[0],self.cha_1,
                          self.cha_1_reshape)

            x = self.batch_norm_c1(x)
            x = self.dropout_c1(x)
            x = F.relu(self.conv1(x))

            x = self.ave_po_c1(x)

            x = self.batch_norm_c2(x)
            x = self.dropout_c2(x)
            x = F.relu(self.conv2(x))
            x_s = x

            x = self.batch_norm_c2_1(x)
            x = self.dropout_c2_1(x)
            x = F.relu(self.conv2_1(x))

            x = self.batch_norm_c2_2(x)
            x = self.dropout_c2_2(x)
            x = F.relu(self.conv2_2(x))
            x =  x * x_s

            x = self.max_po_c2(x)

            x = self.flt(x)

            x = self.batch_norm3(x)
            x = self.dropout3(x)
            x = self.dense3(x)

            return x

   

In [21]:
class StackingDataset(Dataset):
    def __init__(self, df, features, labels=None):
        self.df = df[features].values
        self.labels = labels.values
        
    def __len__(self, ):
        return len(self.df)
    
    
    def __getitem__(self, item):
        inputs =  torch.FloatTensor(self.df[item]).float()
        
        if self.labels is None:
            return inputs
        labels = torch.tensor(self.labels[item]).float()
        return inputs, labels
    

In [22]:
# fold_num = 0

# train_dataset = StackingDataset(oof_dfs, ["pred0"], folds[["score"]])

# train_loader = DataLoader(train_dataset, batch_size=oneDCNNCFG.batch_size, shuffle=True, 
#                           num_workers=4, pin_memory=True, drop_last=True)

# inputs, label = train_dataset[0]
# inputs

In [23]:
class oneDCNNCFG:
    max_grad_norm=1000
    gradient_accumulation_steps=1
    hidden_size=1024
    dropout=0.3
    lr=1e-4
    batch_size=128
    epochs=50
    weight_decay=1e-5
    n_fold = 5
    n_channels_list = [206, 512, 1024]
    use_bias = True
    features = oof_dfs.columns[:-1]
    kwargs_head= {
        "n_features_list": [1024, 2048, 206],
        "use_tail_as_out": True,
        "drop_rate": 0.8,
        "use_bn": False,
        "use_wn": True,
        "block_name": "LABD",
    }

    
train =  pd.read_csv(INPUT_DIR+'train.csv')
train['score_map'] = train['score'].map({0.00: 0, 0.25: 1, 0.50: 2, 0.75: 3, 1.00: 4})
Fold = StratifiedKFold(n_splits=oneDCNNCFG.n_fold, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(Fold.split(train, train['score_map'])):
    train.loc[val_index, 'fold'] = int(n)
train['fold'] = train['fold'].astype(int)
folds = train
display(train.groupby('fold').size())

train = train.merge(oof_dfs ,on="id")#
target = train["score"]
train = train[oof_dfs.columns[:-1]]


fold
0    7295
1    7295
2    7295
3    7294
4    7294
dtype: int64

In [24]:
from tqdm.notebook import tqdm

In [25]:
def train_stack(train_loader, model, optimizer, epoch, scheduler, device,CFG):
    
    losses = AverageMeter()

    model.train()

    for step, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        batch_size = x.size(0)
        pred = model(x)
        loss = nn.BCEWithLogitsLoss()(pred, y.unsqueeze(1))
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps

        loss.backward()

        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)

        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scheduler.step()
            optimizer.step()
            optimizer.zero_grad()
        
    return losses.avg


def valid_stack(valid_loader, model, device, CFG):
    
    losses = AverageMeter()

    model.eval()
    val_preds = []
    y_true =[]
    for step, (x, y) in enumerate(valid_loader):
        
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        batch_size = x.size(0)

        with torch.no_grad():
            pred = model(x)
            
        loss = nn.BCEWithLogitsLoss()(pred, y.unsqueeze(1))
        losses.update(loss.item(), batch_size)


        val_preds.append(pred.sigmoid().detach().cpu().numpy())
        y_true.append(y.detach().cpu().numpy())
#         val_preds.append(pred.tanh().detach().cpu().numpy())

        if CFG.gradient_accumulation_steps> 1:
            loss = loss / CFG.gradient_accumulation_steps
    val_preds = np.concatenate(val_preds)
    y_true= np.concatenate(y_true)
    score = get_score(y_true, val_preds)
    return losses.avg, val_preds, score[0]



class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [28]:
def run_single_nn(cfg, train, folds, features, target, device, fold_num, seed, MODEL,save_dir, model_name):
    seed_everything(seed=seed)
    trn_idx = folds[folds["fold"]!=fold_num].index
    val_idx = folds[folds["fold"]==fold_num].index
    
    train_target = target[trn_idx]
    valid_target = target[val_idx]
    
    train_folds = train.loc[trn_idx].reset_index(drop=True)
    valid_folds = train.loc[val_idx].reset_index(drop=True)
    
    train_dataset = StackingDataset(train_folds, features, train_target)
    valid_dataset = StackingDataset(valid_folds, features, valid_target)
    
    train_loader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, 
                              num_workers=4, pin_memory=False, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=cfg.batch_size, shuffle=False, 
                              num_workers=4, pin_memory=False, drop_last=False)
    
    model = MODEL(
            num_features=len(cfg.features), num_targets=1, hidden_size=cfg.hidden_size
    )
    model.to(device, non_blocking=True)
    optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-2, epochs=cfg.epochs, steps_per_epoch=len(train_loader))

    # train & validate
    best_score = -np.inf
    improve_count = 0
    for epoch in range(cfg.epochs):
        train_loss = train_stack(train_loader, model, optimizer, epoch, scheduler, device, cfg)
        valid_loss, val_pred, score = valid_stack(valid_loader, model, device, cfg)
        if score>best_score:
            LOGGER.info(f"epoch [{epoch}] update {best_score} to {score}")
            best_score = score
            oof = np.zeros((len(train), 1))
            oof[val_idx] = val_pred
            torch.save(model.state_dict(),f"{save_dir}{model_name}_fold{fold_num}_seed{seed}.pth")
            improve_count=0
        else:
            improve_count+=1
            
        if improve_count>5:
            break
    return oof
def run_kfold_nn(cfg, train, folds, features, target, device, n_fold, seed,MODEL, save_dir, model_name):
    print("run kfold nn")
    oof = np.zeros((len(train), 1))
    for fold_num in range(n_fold):
        LOGGER.info(f"fold {fold_num}")
        _oof = run_single_nn(cfg, train, folds, features, target, device, fold_num, seed, MODEL,save_dir, model_name)
        oof += _oof
    score = get_score(target.values, oof)
    LOGGER.info("="*10)
    LOGGER.info(f"oof score {score}")
    return oof

In [29]:

oof = np.zeros((len(train), 1))
SEED = [42, 1999, 2022]
for i, seed in enumerate(SEED):
    LOGGER.info(f"set seed {seed}")
    _oof = run_kfold_nn(oneDCNNCFG, train, folds, oneDCNNCFG.features, target, device,
                        n_fold=5, seed=seed,MODEL=oneDCNN, save_dir=OUTPUT_DIR,
                        model_name="1dcnn")
    oof += _oof/len(SEED)
    

set seed 42
fold 0


run kfold nn


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8666650107338124


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.8666650107338124 to 0.869084994599305


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [3] update 0.869084994599305 to 0.8720510976166995


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [7] update 0.8720510976166995 to 0.8730562199229542


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [9] update 0.8730562199229542 to 0.8731451929912086


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 1


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8662696031490927


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [2] update 0.8662696031490927 to 0.8675130230874404


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [5] update 0.8675130230874404 to 0.8677440678307992


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [6] update 0.8677440678307992 to 0.868894638413759


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 2


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8621559502150844


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.8621559502150844 to 0.8677800486887642


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 3


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8566501444271641


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.8566501444271641 to 0.8629554000075702


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [2] update 0.8629554000075702 to 0.8647735332932187


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [5] update 0.8647735332932187 to 0.865196520343028


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 4


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8615256700316267


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.8615256700316267 to 0.8670142377160409


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [4] update 0.8670142377160409 to 0.8678736674773462


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [5] update 0.8678736674773462 to 0.8688367453529037


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

oof score [0.8650659172713305]
set seed 1999
fold 0


run kfold nn


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8680712740894491


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [2] update 0.8680712740894491 to 0.8710175326529075


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [4] update 0.8710175326529075 to 0.8728635668522634


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [6] update 0.8728635668522634 to 0.8734797875768655


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 1


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.855124937679014


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.855124937679014 to 0.8663804590270143


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [3] update 0.8663804590270143 to 0.8681260078675722


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [8] update 0.8681260078675722 to 0.8691418099046089


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 2


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8554814745174912


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [2] update 0.8554814745174912 to 0.8673175838170252


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [6] update 0.8673175838170252 to 0.8686537759046952


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 3


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8608067569071546


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.8608067569071546 to 0.8637619541909259


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [2] update 0.8637619541909259 to 0.8639230508876518


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [5] update 0.8639230508876518 to 0.864110843841486


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 4


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8643716246228444


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.8643716246228444 to 0.8659327508553554


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [2] update 0.8659327508553554 to 0.8676499940821676


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [3] update 0.8676499940821676 to 0.8678370962900779


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [5] update 0.8678370962900779 to 0.8683833924693601


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

oof score [0.8673149331172924]
set seed 2022
fold 0


run kfold nn


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8720478159312703


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [3] update 0.8720478159312703 to 0.8721601361798605


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [7] update 0.8721601361798605 to 0.8723616747376488


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [10] update 0.8723616747376488 to 0.8726284968737653


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [12] update 0.8726284968737653 to 0.8739088604572764


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 1


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.86173118428932


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.86173118428932 to 0.8675000960852981


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [2] update 0.8675000960852981 to 0.8689698804560333


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [8] update 0.8689698804560333 to 0.8699036291626042


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 2


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8640499582154345


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.8640499582154345 to 0.8667449900960114


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [2] update 0.8667449900960114 to 0.8681243527037595


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [7] update 0.8681243527037595 to 0.8684465654959506


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [10] update 0.8684465654959506 to 0.8686333837729442


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [16] update 0.8686333837729442 to 0.8690539763057146


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 3


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8492873384314916


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [1] update 0.8492873384314916 to 0.8640031025158786


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [3] update 0.8640031025158786 to 0.8641733639452706


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [7] update 0.8641733639452706 to 0.8650784845878492


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

fold 4


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [0] update -inf to 0.8639058181120602


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [2] update 0.8639058181120602 to 0.864394955479717


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [3] update 0.864394955479717 to 0.8680619991502908


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

epoch [5] update 0.8680619991502908 to 0.868384613319161


  0%|          | 0/227 [00:00<?, ?it/s]

epoch [6] update 0.868384613319161 to 0.8688929299974136


  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

oof score [0.8663053158849]


In [None]:
train["score"] = oof

train.to_csv(f"{OUTPUT_DIR}oof_df.csv",index=False)

In [None]:
# if not local:
#     submission['score'] = predictions
#     display(submission.head())
#     submission[['id', 'score']].to_csv('submission.csv', index=False)

In [None]:
# all_predictions

In [None]:
torch.tensor(np.arange(128)).unsqueeze().shape