# Start

In [None]:
!nvidia-smi

In [None]:
## transformer をinstall
!pip uninstall -y transformers
!pip uninstall -y tokenizers
!pip install --quiet torch==1.9.1
!pip install --quiet transformers==4.16.2
!pip install --quiet tokenizers==0.11.6
!pip install --quiet sentencepiece
!pip install bitsandbytes-cuda112==0.26.0

# Config

In [None]:
# ====================================================
# CFG
# ====================================================

class CFG:
  name="bigpatent_pl"
  max_len=512 # ★★★
  wandb=False
  competition="PPPM"
  _wandb_kernel="kunishou"
  debug=False
  apex=True
  print_freq=100
  num_workers=4
  model="google/bigbird-pegasus-large-bigpatent"
  #model="funnel-transformer/xlarge"
  scheduler="CosineAnnealingLR"
  batch_scheduler=True
  num_cycles=0.5
  num_warmup_steps=50 # change
  epochs=5
  encoder_lr=2e-5 # change
  decoder_lr=2e-5 # change
  min_lr=1e-7
  eps=5e-6
  betas=(0.9, 0.999)
  #factor=0.2 # ReduceLROnPlateau
  #patience=4 # ReduceLROnPlateau
  #eps=1e-6 # ReduceLROnPlateau
  T_max=50 # CosineAnnealingLR
  #T_0=50 # CosineAnnealingWarmRestarts
  batch_size=16   # ★★★ https://www.kaggle.com/c/nbme-score-clinical-patient-notes/discussion/308298
  fc_dropout=0.2
  weight_decay=0.01
  target_size=1
  gradient_accumulation_steps=4
  max_grad_norm=1000
  seed=44
  n_fold=5
  trn_fold=[0, 1, 2, 3, 4]
  train=True
  wandb_key = "" # not good

  # Colab Env
  upload_from_colab = True
  api_path = "/content/drive/MyDrive/kaggle/kaggle.json"
  drive_path = "/content/drive/My Drive/uspppm/"
  
  # Kaggle Env
  kaggle_dataset_path = None
    
if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0]

# Import Module

In [None]:
# ====================================================
# Library
# ====================================================
import os
import gc
import re
import ast
import sys
import copy
import json
import time
import math
import string
import pickle
import random
import joblib
import logging
import itertools
import datetime
import warnings
import shutil
warnings.filterwarnings("ignore")

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold, StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

#!pip uninstall -y transformers
#!pip uninstall -y tokenizers
#pip install transformers==4.16.2
#!pip install tokenizers==0.11.0

import tokenizers
import transformers
print(f"torch.__version__: {torch.__version__}")
print(f"tokenizers.__version__: {tokenizers.__version__}")
print(f"transformers.__version__: {transformers.__version__}")
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
%env TOKENIZERS_PARALLELISM=true

import bitsandbytes as bnb

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# ====================================================
# Utils
# ====================================================

def get_score(y_true, y_pred):
  # 今回の評価指標
  # sp.stats.pearsonrでは tupleで(相関係数, p値)で返ってくるので[0]で値を取得する
  # https://www.st-hakky-blog.com/entry/2018/01/30/004659
  score = sp.stats.pearsonr(y_true, y_pred)[0]
  return score

def seed_everything(seed=CFG.seed):
  random.seed(seed)
  os.environ["PYTHONHASHSEED"] = str(seed) # ハッシュ生成のランダム化を無効
  np.random.seed(seed)
  torch.manual_seed(seed) # こっちだけでも一応CUDA側のseedも固定してくれる。複数GPUの場合はmanual_seed_all()
  torch.cuda.manual_seed(seed) 
  torch.backends.cudnn.deterministic=True # 決定論的アルゴリズムを使用する

seed_everything(CFG.seed)

In [None]:
COLAB = "google.colab" in sys.modules

In [None]:
# Logger
class Logger:
  def __init__(self, path):
    self.general_logger = logging.getLogger(path)
    stream_handler = logging.StreamHandler()
    file_general_handler = logging.FileHandler(os.path.join(path, "Experiment.log"))
    if len(self.general_logger.handlers) ==0:
      self.general_logger.addHandler(stream_handler)
      self.general_logger.addHandler(file_general_handler)
      self.general_logger.setLevel(logging.INFO)

  def info(self, message):
        # display time
        self.general_logger.info('[{}] - {}'.format(self.now_string(), message))

  @staticmethod
  def now_string():
      return str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

In [None]:
# colab環境での設定
if COLAB:
    #print("This environment is Google Colab")
    
    # mount
    from google.colab import drive
    if not os.path.isdir("/content/drive"):
        drive.mount('/content/drive') 
        
    #os.chdir('/content/drive/My Drive/uspppm/notebook')

    # import library
    ! pip install --quiet wandb

    # use kaggle api (need kaggle token)
    f = open(CFG.api_path, 'r')
    json_data = json.load(f) 
    os.environ["KAGGLE_USERNAME"] = json_data["username"]
    os.environ["KAGGLE_KEY"] = json_data["key"]
    
    # set dirs
    DRIVE = CFG.drive_path
    EXP = (CFG.name if CFG.name is not None 
           else get("http://172.28.0.2:9000/api/sessions").json()[0]["name"][:-6])
    INPUT = os.path.join(DRIVE, "Input")
    OUTPUT = os.path.join(DRIVE, "Output")
    # SUBMISSION = os.path.join(DRIVE, "Submission")
    OUTPUT_EXP = os.path.join(OUTPUT, EXP) 
    EXP_MODEL = os.path.join(OUTPUT_EXP, "model")
    # EXP_FIG = os.path.join(OUTPUT_EXP, "fig")
    EXP_PREDS = os.path.join(OUTPUT_EXP, "preds")
    EXP_TOKENIZER = os.path.join(OUTPUT_EXP, "tokenizer") # change

    # make dirs
    for d in [INPUT, EXP_MODEL, EXP_PREDS]:
        os.makedirs(d, exist_ok=True)

    if not os.path.isfile(os.path.join(INPUT, "train.csv")):
        # load dataset
        ! kaggle competitions download -c us-patent-phrase-to-phrase-matching -p $INPUT
        # unzip need
    
    logger = Logger(OUTPUT_EXP)
    print("This environment is Google Colab")

else:
    print("This environment is Kaggle Kernel")
    
    # set dirs
    INPUT = "../input/us-patent-phrase-to-phrase-matching/"
    EXP, OUTPUT, SUBMISSION = "./", "./", "./"
    EXP_MODEL = os.path.join(EXP, "model")
    # EXP_FIG = os.path.join(EXP, "fig")
    EXP_PREDS = os.path.join(EXP, "preds")
    EXP_TOKENIZER = os.path.join(OUTPUT_EXP, "tokenizer") # change
    
    # copy dirs
    if CFG.kaggle_dataset_path is not None:
        KD_MODEL = os.path.join(CFG.kaggle_dataset_path, "model")
        KD_EXP_PREDS = os.path.join(CFG.kaggle_dataset_path, "preds")
        shutil.copytree(KD_MODEL, EXP_MODEL)
        shutil.copytree(KD_EXP_PREDS, EXP_PREDS)

    # make dirs
    for d in [EXP_MODEL, EXP_PREDS]:
        os.makedirs(d, exist_ok=True)
        
    # utils
    logger = Logger(OUTPUT_EXP)

In [None]:
# ====================================================
# wandb
# ====================================================
if CFG.wandb:
  import wandb

  try:
    wandb.login(key=CFG.wandb_key)
    anory = None
  except:
    anory = "must"
    print("please check wandb key")

  def class2dict(f):
    return dict((name, getattr(f, name)) for name in dir(f) if not name.startswith('__'))

  run = wandb.init(project="PPPM",
                   name=CFG.model,
                   config=class2dict(CFG),
                   group=CFG.model,
                   job_type="train",
                   anonymous=anory)

# Data Loading

In [None]:
# ====================================================
# Data Loading
# ====================================================
train = pd.read_csv('../input/us-patent-phrase-to-phrase-matching/train_pl2.csv') # ★★★
print(f"train.shape: {train.shape}")
display(train.head())

In [None]:
# ====================================================
# CPC Data
# 追加情報を記載する
# ====================================================
def get_cpc_texts():
  contexts = []
  patten = '[A-Z]\d+'

  for file_name in os.listdir(os.path.join(INPUT, 'CPCSchemeXML202105')):
    result = re.findall(patten, file_name)
    if result:
      contexts.append(result)
  """
  入れ子リストをlistに戻す方法
  sum(list, [])
  """
  contexts = sorted(set(sum(contexts, [])))
  results = {}
  for cpc in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'Y']:
    with open(os.path.join(INPUT, f'CPCTitleList202202/cpc-section-{cpc}_20220201.txt')) as f:
      s = f.read()
    pattern = f'{cpc}\t\t.+'
    result = re.findall(pattern, s)
    cpc_result = result[0].lstrip(pattern)
    for context in [c for c in contexts if c[0] == cpc]:
      pattern = f'{context}\t\t.+'
      result = re.findall(pattern, s)
      results[context] = cpc_result + ". " + result[0].lstrip(pattern)

  return results

cpc_texts = get_cpc_texts()    
torch.save(cpc_texts, os.path.join(OUTPUT_EXP, "cpc_texts.pth"))
train['context_text'] = train['context'].map(cpc_texts)
display(train.head())

In [None]:
# ====================================================
# preprocess
# https://www.kaggle.com/competitions/us-patent-phrase-to-phrase-matching/discussion/315827
# ====================================================

def omit_char(x):
  x = x.replace(";", "") # ; を削除
  x = x.replace("[", "") # [] を削除
  x = x.replace("]", "") # [] を削除
  x = x.lower() # すべて小文字に変換
  return x

# 数字除外
train['anchor'].replace("dry coating composition1", "dry coating composition", inplace=True)

train['context_text'] = train['context_text'].map(omit_char)

train['context_text'] = train['context_text'].replace("human necessities. griculture forestry animal husbandry hunting trapping fishing", 
                                                      "human necessities. agriculture forestry animal husbandry hunting trapping fishing")

train['context_text'] = train['context_text'].str.replace("hemistry", "chemistry")

display(train.head())

In [None]:
# ====================================================
# Create text
# ====================================================
train['text'] = train['anchor'] + '[SEP]' + train['target'] + '[SEP]'  + train['context_text']

display(train.head())

In [None]:
train['score_map'] = train['score'].map({0.00: 0, 0.25: 1, 0.50: 2, 0.75: 3, 1.00: 4})
encoder = LabelEncoder()
train['anchor_map'] = encoder.fit_transform(train['anchor'])

In [None]:
# ====================================================
# CV split
# StratifiedGroupKFoldに変更
# ====================================================

train_pl = train[train["id"].isnull()].copy().reset_index()
train = train[~train["id"].isnull()].copy()

Fold = StratifiedGroupKFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(Fold.split(train, train['score_map'], groups=train["anchor_map"])):
  train.loc[val_index, 'fold'] = int(n)

for n, (train_index, val_index) in enumerate(Fold.split(train_pl, train_pl['score_map'], groups=train_pl["anchor_map"])):
  train_pl.loc[val_index, 'fold'] = int(n)

train['fold'] = train['fold'].astype(int)
train_pl['fold'] = train_pl['fold'].astype(int)

display(train.groupby('fold').size())

In [None]:
display(train_pl.groupby('fold').size())

In [None]:
# 別々に5foldに分割したtrain,train_plをconcat

train0 = train.copy() 
train = pd.concat([train,train_pl],axis=0,ignore_index=True)
train

In [None]:
train.shape

In [None]:
if CFG.debug:
    display(train.groupby('fold').size())
    train = train.sample(n=1000, random_state=0).reset_index(drop=True)
    display(train.groupby('fold').size())

In [None]:
# ====================================================
# tokenizer
# ====================================================

# tokenizerに略語を追加していく
abbreviations = ['h2o', 'conh2', 'vegfr2', 'her2']

tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.add_tokens(abbreviations, special_tokens=False)
tokenizer.save_pretrained(EXP_TOKENIZER)
CFG.tokenizer = tokenizer

In [None]:
# ====================================================
# Define max_len
# ====================================================
lengths_dict = {}

lengths = []
tk0 = tqdm(cpc_texts.values(), total=len(cpc_texts))
for text in tk0:
  length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
  lengths.append(length)
lengths_dict['context_text'] = lengths

for text_col in ['anchor', 'target']:
    lengths = []
    tk0 = tqdm(train[text_col].fillna("").values, total=len(train))
    for text in tk0:
        length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
        lengths.append(length)
    lengths_dict[text_col] = lengths
    
CFG.max_len = max(lengths_dict['anchor']) + max(lengths_dict['target'])\
                + max(lengths_dict['context_text']) + 4 # CLS + SEP + SEP + SEP
logger.info(f"model: {CFG.model}")
logger.info(f"max_len: {CFG.max_len}")

In [None]:
# ====================================================
# Dataset
# ====================================================

def prepare_input(cfg, text):
  inputs = cfg.tokenizer(text,
                         add_special_tokens=True,
                         max_length=cfg.max_len,
                         padding="max_length",
                         return_offsets_mapping=False)
  for k, v in inputs.items():
    inputs[k] = torch.tensor(v, dtype=torch.long)

  return inputs


class TrainDataset(Dataset):
  def __init__(self, cfg, df):
    self.cfg = cfg
    self.texts = df["text"].values
    self.labels = df["score"].values

  def __len__(self):
    return len(self.labels)

  def __getitem__(self, item):
    # item は何個とりだすかの設定
    inputs = prepare_input(self.cfg, self.texts[item])
    label = torch.tensor(self.labels[item], dtype=torch.float)
    return inputs, label

# Model

In [None]:
# ====================================================
# Model
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
            token_embeddings_size = len(CFG.tokenizer)
            self.model.resize_token_embeddings(token_embeddings_size)
        else:
            self.model = AutoModel.from_config(self.config)

        self.fc_dropout1 = nn.Dropout(0.1)
        self.fc_dropout2 = nn.Dropout(0.2)
        self.fc_dropout3 = nn.Dropout(0.3)
        self.fc_dropout4 = nn.Dropout(0.4)
        self.fc_dropout5 = nn.Dropout(0.5)

        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        # これがいわゆるattention pool
        # https://www.kaggle.com/competitions/us-patent-phrase-to-phrase-matching/discussion/324330
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, self.config.hidden_size),
            nn.LayerNorm(self.config.hidden_size),
            nn.GELU(),
            nn.Linear(self.config.hidden_size, 1),
            nn.Softmax(dim=1)
        )
        self._init_weights(self.attention)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            #module.weight.data.normal_(mean=0.0,std=0.02) # ★★★ bigpatent用
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            #module.weight.data.normal_(mean=0.0, std=0.02) # ★★★ bigpatent用
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)

        feature1 = self.fc_dropout1(feature)
        feature2 = self.fc_dropout2(feature)
        feature3 = self.fc_dropout3(feature)
        feature4 = self.fc_dropout4(feature)
        feature5 = self.fc_dropout5(feature)

        feature_all = (feature1+feature2+feature3+feature4+feature5)/5
        output = self.fc(feature_all)
        
        return output

# Helper

In [None]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

In [None]:
def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))
        if CFG.wandb:
            wandb.log({f"[fold{fold}] loss": losses.val,
                       f"[fold{fold}] lr": scheduler.get_lr()[0]})
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

# Train

In [None]:
# ====================================================
# train loop
# ====================================================

def train_loop(folds,folds0, fold):
  """
  folds: df
  fold: fold
  """
  logger.info(f"========== fold: {fold} training ==========")

  # ====================================================
  # loader
  # ====================================================
  train_folds = folds[folds['fold']!=fold].reset_index(drop=True)
  valid_folds = folds0[folds0['fold']==fold].reset_index(drop=True)
  valid_labels = valid_folds["score"].values

  train_dataset = TrainDataset(CFG, train_folds)
  valid_dataset = TrainDataset(CFG, valid_folds)

  train_loader = DataLoader(train_dataset,
                            batch_size = CFG.batch_size,
                            shuffle=True,
                            num_workers=CFG.num_workers,
                            pin_memory=True,
                            drop_last=True
                            )
  valid_loader = DataLoader(valid_dataset,
                            batch_size=CFG.batch_size,
                            shuffle=False,
                            num_workers=CFG.num_workers, 
                            pin_memory=True, 
                            drop_last=False
                            )
  # ====================================================
  # model
  # ====================================================
  model = CustomModel(CFG, config_path=None, pretrained=True)
  torch.save(model.config, os.path.join(OUTPUT_EXP, 'config.pth'))
  model.to(device)

  # ====================================================
  # optimizer
  # ====================================================

  def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
          'lr': encoder_lr, 'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
          'lr': encoder_lr, 'weight_decay': 0.0},
        {'params': [p for n, p in model.named_parameters() if "model" not in n],
          'lr': decoder_lr, 'weight_decay': 0.0}
    ]
    return optimizer_parameters

  optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
  optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
  #optimizer = bnb.optim.Adam8bit(optimizer_parameters, lr=CFG.encoder_lr, betas=CFG.betas)　★★★

  # ====================================================
  # scheduler
  # ====================================================
  def get_scheduler(cfg, optimizer, num_train_steps):
    if cfg.scheduler == 'linear':
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
        )
    elif cfg.scheduler == 'cosine':
        scheduler = get_cosine_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
        )
    elif cfg.scheduler=='ReduceLROnPlateau':
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
    elif cfg.scheduler=='CosineAnnealingLR':
        scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
    elif cfg.scheduler=='CosineAnnealingWarmRestarts':
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
    return scheduler

  num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
  scheduler = get_scheduler(CFG, optimizer, num_train_steps)

  # ====================================================
  # loop
  # ====================================================
  criterion = nn.BCEWithLogitsLoss(reduction="mean")
  best_score = 0.

  for epoch in range(CFG.epochs):
    start_time = time.time()
    # train
    avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)
    # eval
    avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
    # scoring
    score = get_score(valid_labels, predictions)
    elapsed = time.time() - start_time

    logger.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
    logger.info(f'Epoch {epoch+1} - Score: {score:.4f}')

    if CFG.wandb:
      wandb.log({f"[fold{fold}] epoch": epoch+1, 
                 f"[fold{fold}] avg_train_loss": avg_loss, 
                 f"[fold{fold}] avg_val_loss": avg_val_loss,
                 f"[fold{fold}] score": score})
      
    if best_score < score:
      best_score = score
      logger.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
      torch.save({'model': model.state_dict(),
                  'predictions': predictions},
                 os.path.join(EXP_MODEL,f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth"))

  predictions = torch.load(os.path.join(EXP_MODEL,f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth"), 
                             map_location=torch.device('cpu'))['predictions']
  valid_folds['pred'] = predictions

  torch.cuda.empty_cache()
  gc.collect()
    
  return valid_folds

#Main

In [None]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['score'].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        logger.info(f'Score: {score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train,train0, fold) # ★★★変更点
                oof_df = pd.concat([oof_df, _oof_df])
                logger.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
        oof_df = oof_df.reset_index(drop=True)
        logger.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(os.path.join(EXP_PREDS, 'oof_df.pkl'))
        
    if CFG.wandb:
        wandb.finish()

In [None]:
torch.cuda.empty_cache()
gc.collect()

In [None]:
# upload output folder to kaggle dataset
if CFG.upload_from_colab:
    from kaggle.api.kaggle_api_extended import KaggleApi

    def dataset_create_new(dataset_name, upload_dir):
        dataset_metadata = {}
        dataset_metadata['id'] = f'{os.environ["KAGGLE_USERNAME"]}/{dataset_name}'
        dataset_metadata['licenses'] = [{'name': 'CC0-1.0'}]
        dataset_metadata['title'] = dataset_name
        with open(os.path.join(upload_dir, 'dataset-metadata.json'), 'w') as f:
            json.dump(dataset_metadata, f, indent=4)
        api = KaggleApi()
        api.authenticate()
        api.dataset_create_new(folder=upload_dir, convert_to_csv=False, dir_mode='tar')

    dataset_create_new(dataset_name=CFG.competition + "-" + CFG.name, upload_dir=OUTPUT_EXP)