<a href="https://colab.research.google.com/github/SUPER-TADORY/-/blob/main/feedback3baseline_sweep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#初期操作

In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive/')
import os
print(os.cpu_count())
#kaggle APIキーをupload
# #kaggle APIキーをupload
if os.path.exists("/content/drive/MyDrive/Kaggle/competitions/"):
    !cp /content/drive/MyDrive/Kaggle/utils/kaggle.json ./
else:
    from google.colab import files
    files.upload()
from google.colab import auth
auth.authenticate_user()

In [None]:
!pip install --upgrade --force-reinstall --no-deps kaggle > /dev/null 2>&1
!pip install --target ./python requests google-api-python-client google-auth requests-oauthlib > /dev/null 2>&1
!pip install madgrad > /dev/null 2>&1
!pip install transformers > /dev/null 2>&1
!pip install sentencepiece > /dev/null 2>&1
!pip install wandb > /dev/null 2>&1
!pip install iterative-stratification
!python3 -m textblob.download_corpora
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/

from googleapiclient.discovery import build
import io, os
from googleapiclient.http import MediaIoBaseDownload

drive_service = build('drive', 'v3')
results = drive_service.files().list(
        q="name = 'kaggle.json'", fields="files(id)").execute()
kaggle_api_key = results.get('files', [])

filename = "/root/.kaggle/kaggle.json"
os.makedirs(os.path.dirname(filename), exist_ok=True)

!chmod 600 /root/.kaggle/kaggle.json

In [None]:
!mkdir /kaggle
!kaggle competitions download -c feedback-prize-english-language-learning -p /kaggle/
!unzip -q /kaggle/feedback-prize-english-language-learning.zip -d /kaggle
!rm /kaggle/feedback-prize-english-language-learning.zip

# ディレクトリの作成

In [None]:
# import os
# if os.path.exists("/content/drive/MyDrive/Kaggle/competitions/Feedback_Prize_Predicting_Effective_Arguments/code"):
#   ROOT_PATH = "/content/drive/MyDrive/Kaggle/competitions/Feedback_Prize_Predicting_Effective_Arguments/code/feedbackprize"
# else:

#drive
ROOT_DRIVEPATH = "/content/drive/MyDrive/results_for_furu"

#Local
ROOT_PATH = "/kaggle"


In [None]:
import os
import datetime
# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
dt_now = datetime.datetime.now()
savename = dt_now.strftime('%Y-%m-%d')
version = f"{savename}_opt_fold[2,3]"
savedir = f"{ROOT_PATH}/{version}"
savedir_drive = f"{ROOT_DRIVEPATH}/{version}"

# local fold
os.makedirs(savedir,exist_ok=True)
srcdir = savedir + "/src"
os.makedirs(srcdir,exist_ok=True)

# drive fold
os.makedirs(savedir_drive,exist_ok=True)
srcdir_drive = savedir_drive + "/src"
os.makedirs(srcdir_drive,exist_ok=True)


## srcの定義

### 編集するときは writefileをコメントアウト(保存するときにコメントアウト外す)

## `src/__init__.py`

In [None]:
%%writefile {srcdir}/__init__.py

from .models import FeedBackModel
from .helper_func import helper
from .trainer import Trainers
from .train_datasets import FeedBackDataset, Collate
#from .eval_datasets import essay_ds, discourse_ds, disc_collate, essay_collate

## src/models.py

In [None]:
%%writefile {srcdir}/models.py
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel, AutoConfig

def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False

class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class FeedBackModel(nn.Module):
    def __init__(self, model_name, opt_params=None, num_labels=6):
        super(FeedBackModel, self).__init__()
        self.config = AutoConfig.from_pretrained(model_name)
        self.config.update(
            {
                "hidden_dropout_prob": 0.1,
                "layer_norm_eps": 1e-5,
                "add_pooling_layer": False,
                "num_labels": num_labels,
            }
        )
        if opt_params is not None:
            self.config.update(
                {
                    "hidden_dropout_prob": opt_params["backbone_dropout_prob"],
                    "layer_norm_eps": opt_params["layer_norm_eps"],
                    "add_pooling_layer": False,
                    "attention_probs_dropout_prob":0,
                    "num_labels": num_labels,
                }
            )
        
        self.model = AutoModel.from_pretrained(model_name, config=self.config)
        if opt_params is not None:
            self.dropout = nn.Dropout(p=opt_params["head_dropout_prob"])
            self.drop1 = nn.Dropout(p=opt_params["stable_prob1"])
            self.drop2 = nn.Dropout(p=opt_params["stable_prob2"])
            self.drop3 = nn.Dropout(p=opt_params["stable_prob3"])
            self.drop4 = nn.Dropout(p=opt_params["stable_prob4"])
            self.drop5 = nn.Dropout(p=opt_params["stable_prob5"])
        else:
            self.dropout = nn.Dropout(p=0.1)
            self.drop1 = nn.Dropout(p=0.1)
            self.drop2 = nn.Dropout(p=0.2)
            self.drop3 = nn.Dropout(p=0.3)
            self.drop4 = nn.Dropout(p=0.4)
            self.drop5 = nn.Dropout(p=0.5)
        self.pooler = MeanPooling()
        self.fc = nn.Linear(self.config.hidden_size, self.config.num_labels)
        self.loss = nn.MSELoss()
        #self.loss = nn.SmoothL1Loss()

        if opt_params["freeze_emb"]:
            freeze(self.model.embeddings)
            
        if opt_params["freeze_layernum"] > 0:
            num = opt_params["freeze_layernum"]
            freeze(self.model.encoder.layer[:num])

        if opt_params["reinit_layernum"] > 0:
            rnum = opt_params["reinit_layernum"]
            self._init_weights(self.model.encoder.layer[-rnum:])
        
        if opt_params["init_head"]:
            self._init_weights(self.fc)

        self.model.gradient_checkpointing_enable()

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
    
    def mcrmse_fn(self, outputs, targets):
        colwise_mse = torch.mean(torch.square(targets - outputs), dim=0)
        loss = torch.mean(torch.sqrt(colwise_mse), dim=0)
        return loss

    def get_emb(self, ids, mask, target=None,text_id=None):
        out = self.model(input_ids=ids,attention_mask=mask
                         ,output_hidden_states=False)
        out = self.pooler(out.last_hidden_state,mask)
        outputs = self.fc(out)
        return outputs, out
        
    def forward(self, ids, mask, target=None,text_id=None):        
        out = self.model(input_ids=ids,attention_mask=mask
                         ,output_hidden_states=False)
        out = self.pooler(out.last_hidden_state,mask)
        out = self.dropout(out)

        outputs = self.fc(out)

        if target is not None:
          loss1 = self.loss(self.fc(self.drop1(out)), target) 
          loss2 = self.loss(self.fc(self.drop2(out)), target) 
          loss3 = self.loss(self.fc(self.drop3(out)), target) 
          loss4 = self.loss(self.fc(self.drop4(out)), target) 
          loss5 = self.loss(self.fc(self.drop5(out)), target) 
          loss = (loss1 + loss2 + loss3 + loss4 + loss5)/5
          loss = torch.sqrt(loss)
          return loss
        else:
          return outputs

## src/train_datasets.py

In [None]:
%%writefile {srcdir}/train_datasets.py

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
import random

from text_unidecode import unidecode
from typing import Dict, List, Tuple
import codecs

class FeedBackDataset(Dataset):
    def __init__(self, df, config):
        if config["text_encode"]:
            df['full_text'] = df['full_text'].apply(lambda x : self.resolve_encodings_and_normalize(x))
        self.df = df
        self.text = df['full_text'].values
        self.text_id = df['text_id'].values
        self.targets = df[['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar','conventions']].values
        self.textlength = df["text_length"].values
        self.config = config
        self.tokenizer = config["tokenizer"]

    def resolve_encodings_and_normalize(self, text: str) -> str:
        def replace_encoding_with_utf8(error: UnicodeError) -> Tuple[bytes, int]:
            return error.object[error.start : error.end].encode("utf-8"), error.end


        def replace_decoding_with_cp1252(error: UnicodeError) -> Tuple[str, int]:
            return error.object[error.start : error.end].decode("cp1252"), error.end

        # Register the encoding and decoding error handlers for `utf-8` and `cp1252`.
        codecs.register_error("replace_encoding_with_utf8", replace_encoding_with_utf8)
        codecs.register_error("replace_decoding_with_cp1252", replace_decoding_with_cp1252)
        """Resolve the encoding problems and normalize the abnormal characters."""
        text = (
            text.encode("raw_unicode_escape")
            .decode("utf-8", errors="replace_decoding_with_cp1252")
            .encode("cp1252", errors="replace_encoding_with_utf8")
            .decode("utf-8", errors="replace_decoding_with_cp1252")
        )
        text = unidecode(text)
        return text
          
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        text = self.text[index]
        text_id = self.text_id[index]
        target = self.targets[index]
        inputs = self.tokenizer.encode_plus(
                        text,
                        truncation=True,
                        add_special_tokens=True,
                        max_length = self.config["max_length"],
                        padding=False,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']

        return {
            'text_id': text_id,
            'ids': ids,
            'mask': mask,
            'target': target
        }

class Collate:
    def __init__(self, config):
        self.tokenizer = config["tokenizer"]
        self.dropout_prob = config["token_dropout_prob"]
        self.dropout_ratio = config["token_dropout_ratio"]

    def __call__(self, batch):
        output = dict()
        for name in ["ids","mask", "target","text_id"]:
          output[name] = [sample[name] for sample in batch]

        # calculate max token length of this batch
        batch_max = max([len(ids) for ids in output["ids"]])

        # add padding
        output["ids"] = [s + (batch_max - len(s)) * [self.tokenizer.pad_token_id] for s in output["ids"]]
        output["mask"] = [s + (batch_max - len(s)) * [0] for s in output["mask"]]

        # convert to tensors
        for name in ["ids", "mask"]:
          output[name] = torch.tensor(output[name], dtype=torch.long)
        output["target"] = torch.tensor(output["target"], dtype=torch.float)

        if (self.dropout_prob > 0)&(random.uniform(0,1) < self.dropout_prob):
            output["ids"] = self.torch_mask_tokens(output["ids"])

        return output
    
    def torch_mask_tokens(self, inputs, special_tokens_mask = None):
        probability_matrix = torch.full(inputs.shape, self.dropout_ratio)
        special_tokens_mask = [
            self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in inputs.clone().tolist()
        ]
        special_tokens_mask = torch.tensor(special_tokens_mask, dtype=torch.bool)
        probability_matrix.masked_fill_(special_tokens_mask, value=0.0)
        masked_indices = torch.bernoulli(probability_matrix).bool()\

        # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
        indices_replaced = torch.bernoulli(torch.full(inputs.shape, 0.8)).bool() & masked_indices
        inputs[indices_replaced] = self.tokenizer.convert_tokens_to_ids(self.tokenizer.mask_token)

        # 10% of the time, we replace masked input tokens with random word
        indices_random = torch.bernoulli(torch.full(inputs.shape, 0.5)).bool() & masked_indices & ~indices_replaced
        random_words = torch.randint(len(self.tokenizer), inputs.shape, dtype=torch.long)
        inputs[indices_random] = random_words[indices_random]

        # The rest of the time (10% of the time) we keep the masked input tokens unchanged
        return inputs

## src/test_datasets.py

In [None]:
%%writefile {srcdir}/test_datasets.py

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

class feedbacktestDataset(Dataset):
    def __init__(self, df, config):
        self.df = df
        self.text_id = df['text_id'].values
        self.text = df['full_text'].values
        self.config = config
        self.tokenizer = config["tokenizer"]
        
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        text = self.text[index]
        text_id = self.text_id[index]
        inputs = self.tokenizer.encode_plus(
                        text,
                        truncation=True,
                        add_special_tokens=True,
                        max_length = self.config["max_length"],
                        padding=False,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']

        return {
            'text_id': text_id,
            'ids': ids,
            'mask': mask
        }

class testCollate:
    def __init__(self, config):
        self.tokenizer = config["tokenizer"]

    def __call__(self, batch):
        output = dict()
        for name in ["text_id", "ids","mask"]:
          output[name] = [sample[name] for sample in batch]

        # calculate max token length of this batch
        batch_max = max([len(ids) for ids in output["ids"]])

        # add padding
        output["ids"] = [s + (batch_max - len(s)) * [self.tokenizer.pad_token_id] for s in output["ids"]]
        output["mask"] = [s + (batch_max - len(s)) * [0] for s in output["mask"]]

        # convert to tensors
        for name in ["ids", "mask"]:
          output[name] = torch.tensor(output[name], dtype=torch.long)
        return output

## src/helper_func.py

In [None]:
%%writefile {srcdir}/helper_func.py

import os, gc, copy, time, random, string, joblib
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from text_unidecode import unidecode
from typing import Dict, List, Tuple
import codecs
from textblob import TextBlob

from sklearn.model_selection import GroupKFold, KFold, StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder
try:
    from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
except:
    print("not installed iterstrat")

from .train_datasets import FeedBackDataset, Collate

class helper:
    def __init__(self, config):
        self.config = config
        self.set_seed(config['seed'])

    def resolve_encodings_and_normalize(self, text: str) -> str:
        def replace_encoding_with_utf8(error: UnicodeError) -> Tuple[bytes, int]:
            return error.object[error.start : error.end].encode("utf-8"), error.end


        def replace_decoding_with_cp1252(error: UnicodeError) -> Tuple[str, int]:
            return error.object[error.start : error.end].decode("cp1252"), error.end

        # Register the encoding and decoding error handlers for `utf-8` and `cp1252`.
        codecs.register_error("replace_encoding_with_utf8", replace_encoding_with_utf8)
        codecs.register_error("replace_decoding_with_cp1252", replace_decoding_with_cp1252)
        """Resolve the encoding problems and normalize the abnormal characters."""
        text = (
            text.encode("raw_unicode_escape")
            .decode("utf-8", errors="replace_decoding_with_cp1252")
            .encode("cp1252", errors="replace_encoding_with_utf8")
            .decode("utf-8", errors="replace_decoding_with_cp1252")
        )
        text = unidecode(text)
        return text

    def set_seed(self, seed=42):
        '''Sets the seed of the entire notebook so results are the same every time we run.
        This is for REPRODUCIBILITY.'''
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        # When running on the CuDNN backend, two further options must be set
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        # Set a fixed value for the hash seed
        os.environ['PYTHONHASHSEED'] = str(seed)

    def prepare_loaders_splite_trainstep(self, df, fold):
        collate_fn = Collate(self.config)
        df_valid = df[df.kfold == fold].sort_values("text_length").reset_index(drop=True)
        valid_dataset = FeedBackDataset(df_valid, config=self.config)
        valid_loader = DataLoader(
            valid_dataset, 
            batch_size=self.config['valid_batch_size'], 
            collate_fn = collate_fn, 
            num_workers=os.cpu_count(), 
            pin_memory=True, 
            shuffle=False,
            drop_last=False,
        )

        df_train = df[df.kfold != fold].reset_index(drop=True)
        train_dataset = FeedBackDataset(df_train, config=self.config)
        train_loader = DataLoader(
            train_dataset, 
            batch_size=self.config['train_batch_size'], 
            collate_fn = collate_fn, 
            num_workers=os.cpu_count(), 
            pin_memory=True, 
            shuffle=True,
            drop_last=True,
        )

        return train_loader, valid_loader

    def get_df(self):
        df = pd.read_csv(self.config["train_df"])
        #df['full_text'] = df['full_text'].apply(lambda x : self.resolve_encodings_and_normalize(x).strip().lower())
        df["text_length"] = df.full_text.apply(lambda x: len(x.split()))

        mskf = MultilabelStratifiedKFold(n_splits=self.config['n_fold'], shuffle=True, random_state=self.config["fold_seed"])
        labels = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
        for fold, ( _, val_) in enumerate(mskf.split(df, df[labels].values)):
            df.loc[val_ , "kfold"] = int(fold)

        df["kfold"] = df["kfold"].astype(int)
        return df

    def get_test_df(self):
        df = pd.read_csv(self.config["test_df"])
        if self.config["text_encode"]:
            df['full_text'] = df['full_text'].apply(lambda x : self.resolve_encodings_and_normalize(x))
        df["text_length"] = df.full_text.apply(lambda x: len(x.split()))
        return df

## src/awp.py

In [None]:
%%writefile {srcdir}/awp.py
import gc
import time
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch.nn.functional as F

#loss backwardの後に入れて調整する
#パラメータ調整に関しては adv_param adv_lr adv_eps adv_step eval_thなどで調整する
class AWP:
    """
    Args:
    adv_param (str): layernameを書く
    adv_lr (float): このパラメータは、最初の層の埋め込みのみを攻撃する場合、すべてのパラメータで 0.1に調整されます。
    adv_eps (float): パラメーターの動きの最大幅の制限、一般に（0,1）の間で設定
    start_epoch (int): 動き始めるエポック
    adv_step (int): 攻撃回数、通常1回の攻撃で比較的効果はあるが、複数回の攻撃には正確な adv_lr が必要
    """

    def __init__(
        self,
        model,
        optimizer,
        adv_param="weight",
        adv_lr=0.0005,
        adv_eps=0.001,
        start_epoch=0,
        adv_step=1,
    ):
        self.model = model
        self.optimizer = optimizer
        self.adv_param = adv_param
        self.adv_lr = adv_lr
        self.adv_eps = adv_eps
        self.start_epoch = start_epoch
        self.adv_step = adv_step
        self.backup = {}
        self.backup_eps = {}

    def attack_backward(self, data):
        # 開始条件が満たされたときに敵対的訓練を開始する
        if (self.adv_lr == 0):
            return None

        self._save()  # 攻撃のパラメーターの重みを保存する
        for i in range(self.adv_step):
            self._attack_step()
            adv_loss = self.model(**data)
            self.optimizer.zero_grad()
            adv_loss.backward()
            
        self._restore()  # 埋め込みパラメーターの復元

    def _attack_step(self):
        e = 1e-6  # 定义一个极小值
        # emb_name パラメータは、モデルの埋め込みのパラメータ名に置き換える必要があります
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                norm1 = torch.norm(param.grad)
                norm2 = torch.norm(param.data.detach())
                if norm1 != 0 and not torch.isnan(norm1):
                    r_at = self.adv_lr * param.grad / (norm1 + e) * (norm2 + e)
                    param.data.add_(r_at)
                    param.data = torch.min(
                        torch.max(param.data, self.backup_eps[name][0]), self.backup_eps[name][1]
                    )
                # param.data.clamp_(*self.backup_eps[name])

    def _save(self):
        # emb_name パラメータは、モデルの埋め込みのパラメータ名に置き換える必要があります
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                # 元のパラメータを保存
                if name not in self.backup:
                    self.backup[name] = param.data.clone()
                    grad_eps = self.adv_eps * param.abs().detach()
                    self.backup_eps[name] = (
                        self.backup[name] - grad_eps,
                        self.backup[name] + grad_eps,
                    )

    def _restore(self,):
        # emb_name パラメータは、モデルの埋め込みのパラメータ名に置き換える必要があります
        for name, param in self.model.named_parameters():
            if name in self.backup:
                param.data = self.backup[name]
        self.backup = {}
        self.backup_eps = {}

## src/trainer.py

In [None]:
%%writefile {srcdir}/trainer.py
import torch
from torch.optim import lr_scheduler
from transformers import AdamW
import os, gc, copy, time, random, string, joblib, json
import numpy as np
import pandas as pd
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast
import wandb
from .train_datasets import FeedBackDataset, Collate
from torch.utils.data import Dataset, DataLoader
from .models import FeedBackModel
from transformers import AutoTokenizer

def set_seed(seed=42):
        '''Sets the seed of the entire notebook so results are the same every time we run.
        This is for REPRODUCIBILITY.'''
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        # When running on the CuDNN backend, two further options must be set
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        # Set a fixed value for the hash seed
        os.environ['PYTHONHASHSEED'] = str(seed)

class Trainers:

    def __init__(self, df, config, sweep=False, opt_params=None):
        self.config = config
        set_seed(config["seed"])
        self.df = df
        self.device = self.config["device"]
        self.fold = self.config["fold"]
        self.sweep = sweep
        if sweep:
            self.sweep_config = opt_params
            print("start sweep mode")
        else:
            wandb.init(
                    project="feedback3", 
                    group="baseline",
                    config = opt_params
            )
        self.opt_params = opt_params

    def fetch_scheduler(self):
        if self.config['scheduler'] == 'CosineAnnealingLR':
            scheduler = lr_scheduler.CosineAnnealingLR(self.optimizer,T_max=self.config['T_max'], 
                                                    eta_min=self.config['min_lr'])
        elif self.config['scheduler'] == 'CosineAnnealingWarmRestarts':
            scheduler = lr_scheduler.CosineAnnealingWarmRestarts(self.optimizer,T_0=self.config['T_0'], 
                                                                eta_min=self.config['min_lr'])
        elif self.config['scheduler'] == None:
            return None
            
        return scheduler
    
    def metric_fn(self, outputs, targets):
        colwise_mse = np.mean(np.square(targets - outputs), axis=0)
        loss = np.mean(np.sqrt(colwise_mse), axis=0)
        return loss

    def predict_fn(self, model, test_loader):
        model.eval()
        
        preds = []
        text_ids = []
        embs = []
        for step, data in enumerate(self.valid_loader):
            text_id = data['text_id']
            ids = data['ids'].to(self.device, dtype = torch.long)
            mask = data['mask'].to(self.device, dtype = torch.long)
            
            with autocast(enabled=True):
                outputs, emb = model.get_emb(ids, mask)
            preds.append(outputs.cpu().detach().numpy())
            embs.append(emb.cpu().detach().numpy())
            text_ids.append(text_id)
        
        preds = np.concatenate(preds)
        embs =  np.concatenate(embs)
        text_ids = np.concatenate(text_ids)
        gc.collect()
        pred_df = pd.DataFrame([text_ids,preds],index_col=["text_id","pred"]).T
        return pred_df

    @torch.no_grad()
    def valid_fn(self):
        self.model.eval()
        
        dataset_size = 0
        running_loss = 0.0
        
        TEXT_IDS = []
        PREDS = []
        EMBS = []
        TARGETS = []

        for step, data in enumerate(self.valid_loader):
            text_id = data['text_id']
            ids = data['ids'].to(self.device, dtype = torch.long)
            mask = data['mask'].to(self.device, dtype = torch.long)
            targets = data['target'].to(self.device, dtype = torch.float)
            
            with autocast(enabled=False):
                outputs, emb = self.model.get_emb(ids, mask)
            TEXT_IDS.append(text_id)
            PREDS.append(outputs.cpu().detach().numpy())
            EMBS.append(emb.cpu().detach().numpy())
            TARGETS.append(targets.cpu().detach().numpy())
        
        TEXT_IDS = np.concatenate(TEXT_IDS)
        PREDS = np.concatenate(PREDS)
        EMBS = np.concatenate(EMBS)
        TARGETS = np.concatenate(TARGETS)
        valid_loss = self.metric_fn(PREDS, TARGETS)
        print("mcrmse score:",valid_loss)
        gc.collect()
        labels = ['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar','conventions']
        oof_df = pd.DataFrame([TEXT_IDS],index=["text_id"]).T
        for index,label in enumerate(labels):
            oof_df[f"{label}_pred"] = PREDS[:,index]
            oof_df[label] = TARGETS[:,index]
        for emb_index in range(EMBS.shape[1]):
            oof_df[f"emb_{emb_index}"] = EMBS[:,emb_index]
        
        return valid_loss, oof_df
  
    def train_one_epoch(self, epoch, best_epoch_loss):
        self.model.train()
      
        dataset_size = 0
        running_loss = 0.0

        bar = tqdm(enumerate(self.train_loader), total=len(self.train_loader))
        self.scaler = GradScaler()
        for step, data in bar:
            text_id = data['text_id']
            data['ids'] = data['ids'].to(self.device, dtype = torch.long)
            data['mask'] = data['mask'].to(self.device, dtype = torch.long)
            data['target'] = data['target'].to(self.device, dtype = torch.float)

            with autocast():
                loss = self.model(**data)
                loss = loss / self.config['n_accumulate']
            self.scaler.scale(loss).backward()

            if (step + 1) % self.config['n_accumulate'] == 0:
                if self.config["max_norm"] > 0:
                    self.scaler.unscale_(self.optimizer)
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config["max_norm"])
                self.scaler.step(self.optimizer)
                self.scaler.update()
                if self.scheduler is not None:
                    self.scheduler.step()
                
                # zero the parameter gradients
                self.optimizer.zero_grad()
     
            running_loss += (loss.item() * self.config["train_batch_size"]) * self.config["n_accumulate"]
            dataset_size += self.config["train_batch_size"]
            
            epoch_loss = running_loss / dataset_size
            
            bar.set_postfix(Epoch=epoch, Train_Loss=epoch_loss,
                            bb_LR=self.optimizer.param_groups[0]['lr'])
            
            if (step % int(self.config["eval_step"]//self.config["train_batch_size"])==0)and((epoch-1)*len(self.train_loader) + step > int(self.config["eval_start"]//self.config["train_batch_size"])):
                val_epoch_loss, self.oof_df = self.valid_fn()
                self.model.train()

                # deep copy the model
                if val_epoch_loss <= best_epoch_loss:
                    print(f"Validation Loss Improved ({best_epoch_loss} ---> {val_epoch_loss})")
                    best_epoch_loss = val_epoch_loss
                    best_model = copy.deepcopy(self.model)
                    best_model.model.half()
                    self.best_model_wts = best_model.state_dict()
                    PATH = f"{self.config['savedir']}/Loss-Fold{self.fold}.bin"
                    torch.save(self.best_model_wts, PATH)
                    oof_path = f"{self.config['savedir']}/oof-Fold{self.fold}.csv"
                    self.oof_df.to_csv(oof_path,index=False)
                    # Save a model file from the current directory
                    print(f"Model and oof dataframe Saved")
                    print()
                
                wandb.log({
                    "valid_loss":val_epoch_loss,
                    "best_valid_loss":best_epoch_loss,
                    "train_loss":epoch_loss
                })


            
        gc.collect()
        
        return epoch_loss, best_epoch_loss

    def run_training(self):
        if self.sweep:
            wandb.init()
            self.config.update(wandb.config)
        

        #import model and tokenzier
        self.model = FeedBackModel(self.config['model_name'], self.config)
        self.model.to(self.config['device'])
        self.config["tokenizer"] = AutoTokenizer.from_pretrained(self.config['model_name'], use_fast=True)
        if self.config["add_bntoken"]:
            self.config["tokenizer"].add_tokens(["\n"], special_tokens=True)
        self.config["tokenizer"].save_pretrained(f"{self.config['savedir']}/tokenizer_fold{self.config['fold']}")

        self.num_epochs = self.config["epochs"]
        #optimizer setting
        param_optimizer = list(self.model.model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': self.config['weight_decay']},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
            {'params': [p for n, p in self.model.fc.named_parameters()], 'learning rate': self.config['head_lr'],'weight_decay': 0.0}
        ]

        self.optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=self.config['learning_rate']
        )
        self.scheduler = self.fetch_scheduler()

        collate_fn = Collate(self.config)
        df_train = self.df[self.df.kfold != self.fold].reset_index(drop=True)
        train_dataset = FeedBackDataset(df_train, config=self.config)
        self.train_loader = DataLoader(
            train_dataset, 
            batch_size=self.config['train_batch_size'], 
            collate_fn = collate_fn, 
            num_workers=os.cpu_count(), 
            pin_memory=True, 
            shuffle=True,
            drop_last=True,
        )

        #validate set
        collate_fn.dropout_drop = 0
        df_valid = self.df[self.df.kfold == self.fold].sort_values("text_length").reset_index(drop=True)
        valid_dataset = FeedBackDataset(df_valid, config=self.config)
        self.valid_loader = DataLoader(
            valid_dataset, 
            batch_size=self.config['valid_batch_size'], 
            collate_fn = collate_fn, 
            num_workers=os.cpu_count(), 
            pin_memory=True, 
            shuffle=False,
            drop_last=False,
        )

        if torch.cuda.is_available():
            print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
        
        start = time.time()
        best_model_wts = copy.deepcopy(self.model.state_dict())
        if self.config["seed_average"]:
            pass
        else:
            self.best_epoch_loss = np.inf
        val_epoch_loss = self.valid_fn()
        for epoch in range(1, self.num_epochs + 1): 
            gc.collect()
            train_epoch_loss, self.best_epoch_loss, best_model_wts, oof_df = self.train_one_epoch(epoch=epoch, best_epoch_loss=self.best_epoch_loss)
        
        end = time.time()
        time_elapsed = end - start
        print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
            time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
        # save only best model for drive
        PATH = f"{self.config['savedir_drive']}/Loss-Fold{self.fold}_{best_epoch_loss}.bin"
        torch.save(self.best_model_wts, PATH)
        oof_path = f"{self.config['savedir_drive']}/oof-Fold{self.fold}_{best_epoch_loss}.csv"
        self.oof_df.to_csv(oof_path,index=False)
        
        print("Best Loss: {:.4f}".format(self.best_epoch_loss))
    
    def run_seed_average(self):
        #reset best loss
        self.best_epoch_loss = np.inf
        for seed in [42,441,3031]:
            set_seed(seed)
            self.run_training()

    def run_sweep(self,sweep_id=None):
        if sweep_id is None:
            sweep_id = wandb.sweep(self.sweep_config)
        if self.config["seed_average"]:
            wandb.agent(sweep_id, self.run_seed_average)
        else:
            wandb.agent(sweep_id, self.run_training)

## predict.py

In [None]:
%%writefile {savedir}/predict.py

import torch
from torch.optim import lr_scheduler
from transformers import AdamW
import os, gc, copy, time, random, string, joblib, sys
import numpy as np
import pandas as pd
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast
import json
from torch.utils.data import Dataset, DataLoader


import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--dataset_dir')
parser.add_argument('--config_path')
parser.add_argument('--testdf_path')
parser.add_argument('--model_name_or_path')
parser.add_argument('--model_weight_path')
parser.add_argument('--batch_size')
parser.add_argument('--output_path')
args = parser.parse_args()

dataset_dir = args.dataset_dir
config_path = args.config_path
testdf_path = args.testdf_path
model_path =  args.model_name_or_path
weight_path = args.model_weight_path
batch_size = int(args.batch_size)
output_path = args.output_path

with open(config_path) as f:
    config = json.load(f)

config = config["fold0"]
config["test_df"] = testdf_path
config["model_name"] = model_path
config["weight_path"] = weight_path
config["test_batch_size"] = batch_size


sys.path.append(dataset_dir)
#sys.path.append("/kaggle/input/iterative-stratification/iterative-stratification-master")
import src
from src.models import FeedBackModel
from src.test_datasets import testCollate, feedbacktestDataset
from src.train_datasets import FeedBackDataset, Collate

device = "cuda"
from transformers import AutoTokenizer, AutoModel, AutoConfig, AdamW, AutoModelForMaskedLM

config["tokenizer"] = AutoTokenizer.from_pretrained(config['model_name'], use_fast=True)

hlc = src.helper(config=config)
test = hlc.get_test_df()

collate_fn = testCollate(config)
test_dataset = feedbacktestDataset(test, config=config)
test_loader = DataLoader(
    test_dataset, 
    batch_size=config["test_batch_size"], 
    collate_fn = collate_fn, 
    num_workers=os.cpu_count(), 
    pin_memory=True, 
    shuffle=False,
    drop_last=False,
)

model = FeedBackModel(config["model_name"]).to(device)
model.load_state_dict(torch.load(config["weight_path"]))
model.model.half()
model.eval()

preds = []
text_ids = []
embs = []
bar = tqdm(enumerate(test_loader), total=len(test_loader))
for step, data in bar:
    text_id = data['text_id']
    ids = data['ids'].to(device, dtype = torch.long)
    mask = data['mask'].to(device, dtype = torch.long)
    with torch.no_grad():
        outputs, emb = model.get_emb(ids, mask)
    preds.append(outputs.cpu().detach().numpy())
    embs.append(emb.cpu().detach().numpy())
    text_ids.append(text_id)

preds = np.concatenate(preds)
embs = np.concatenate(embs)
text_ids = np.concatenate(text_ids)
gc.collect()
labels = ['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar','conventions']
pred_df = pd.DataFrame([text_ids],index=["text_id"]).T
for index,label in enumerate(labels):
    pred_df[label] = preds[:,index]
for emb_index in range(EMBS.shape[1]):
    pred_df[f"emb_{emb_index}"] = EMBS[:,emb_index]

pred_df.to_csv(output_path,index=False)

## train.py

In [None]:
%%writefile {savedir}/train.py

import sys, os, json

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

import os, gc, copy, time, random, string, joblib
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()
from textblob import TextBlob

# Utils
import torch
from tqdm import tqdm

# For Transformer Models
from transformers import AutoTokenizer, AutoModel, AutoConfig, AdamW, AutoModelForMaskedLM

import wandb
wandb.login(key="dd1758beb9fd6044fdc028dfc9245bba1c869a29")

import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--savedir')
parser.add_argument('--savedir_drive')
parser.add_argument('--root_dir')
parser.add_argument('--train_fold', nargs='*')
args = parser.parse_args()
savedir = args.savedir
savedir_drive = args.savedir_drive
rootdir = args.root_dir
train_fold = np.array(args.train_fold ,dtype=np.int64)
print(train_fold)

with open(f'{savedir}/trainparam.json') as f:
    CONFIG = json.load(f)

sys.path.append(savedir)
import src

for fold in train_fold:
    print(f"====== Fold: {fold} ======")
    config = CONFIG[f"fold{fold}"]
    config["savedir_drive"] = savedir_drive 
    hlc = src.helper(config=config)
    df = hlc.get_df()
    hlc.config["savedir"] = savedir
    hlc.config["device"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # Start a wandb run
    hlc.config["fold"] = fold 
    trainer = src.Trainers(
        df = df,
        config = hlc.config,
    )
    if hlc.config["seed_average"]:
        trainer.run_seed_average()
    else:
        trainer.run_training()
    
    del trainer
    _ = gc.collect()
    print()

## sweep.py

In [None]:
%%writefile {savedir}/sweep.py

import sys, os, json

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

import os, gc, copy, time, random, string, joblib
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()
from textblob import TextBlob

import wandb
wandb.login(key="dd1758beb9fd6044fdc028dfc9245bba1c869a29")

# Utils
import torch
from tqdm import tqdm

# For Transformer Models
from transformers import AutoTokenizer, AutoModel, AutoConfig, AdamW, AutoModelForMaskedLM

import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--version')
parser.add_argument('--savedir')
parser.add_argument('--savedir_drive')
parser.add_argument('--root_dir')
parser.add_argument('--fold')
parser.add_argument('--resume', action='store_true')
args = parser.parse_args()

version = args.version
savedir = args.savedir
savedir_drive = args.savedir_drive
rootdir = args.root_dir
fold = int(args.fold)
resume = args.resume

with open(f'{savedir}/trainparam.json') as f:
    CONFIG = json.load(f)

with open(f'{savedir}/opt_parameters.json') as f:
    opt_params = json.load(f)

CONFIG = CONFIG[f"fold{fold}"]

opt_params["name"] = f"{version}-fold{fold}"

CONFIG["savedir"] = savedir
CONFIG["savedir_drive"] = savedir_drive
CONFIG["version"] = version
sys.path.append(CONFIG["savedir"])
CONFIG["device"] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

import src
hlc = src.helper(config=CONFIG)
df = hlc.get_df()

hlc.config["fold"] = fold
hlc.config["exp"] = f"{version}"
print(f"====== sweep mode Fold: {fold} ======")
    
trainer = src.Trainers(
    df = df,
    config = hlc.config,
    opt_params = opt_params,
    sweep = True
)
if resume:
    trainer.run_sweep(sweep_id=CONFIG["sweep_id"])
else:
    trainer.run_sweep()
del trainer
_ = gc.collect()
print()

## trainparam.json
#### best fold seed:539(best loss), 846, 669, 518
#### mlm data /content/drive/MyDrive/results_for_furu/mlm/{model_name} で使用可能(日付管理で更新する？？)

In [None]:
%%writefile {savedir}/trainparam.json

{
    "fold0":{
        "seed": 431,
        "fold_seed":518,
        "T_max": 278,
        "add_bntoken":1,
        "backbone_dropout_prob":0,
        "epochs": 6,
        "freeze_emb":1,
        "freeze_layernum":1,
        "head_dropout_prob":0.1,
        "head_lr":0.0049046,
        "init_head":1,
        "layer_norm_eps":1e-7,
        "learning_rate": 0.00005558104,
        "max_length": 817,
        "max_norm":1,
        "min_lr": 1.6066978e-7,
        "model_name": "microsoft/deberta-v3-large",
        "n_accumulate": 8,
        "reinit_layernum":1,
        "stable_prob1":0,
        "stable_prob2":0,
        "stable_prob3":0.1,
        "stable_prob4":0.1,
        "stable_prob5":0.1,
        "text_encode":1,
        "token_dropout_prob":0.15,
        "token_dropout_ratio":0.2,
        "train_batch_size": 4,
        "weight_decay": 0,
        "exp_name":"baseline",
        "exp":0,
        "valid_batch_size": 2,
        "eval_step":600,
        "eval_start":3000,
        "scheduler": "CosineAnnealingLR",
        "n_fold": 4,
        "competition": "FeedBack3",
        "train_df": "/kaggle/train.csv",
        "test_df":"/kaggle/test.csv",
        "sweep_id":"furufuru/feedback3/ksm2elr7",
        "seed_average":false
    },
    "fold1":{
        "seed": 431,
        "fold_seed":518,
        "T_max": 347,
        "add_bntoken":1,
        "backbone_dropout_prob":0,
        "epochs": 6,
        "freeze_emb":0,
        "freeze_layernum":1,
        "head_dropout_prob":0,
        "head_lr":0.004640476,
        "init_head":1,
        "layer_norm_eps":1e-7,
        "learning_rate": 0.0000235214,
        "max_length": 1254,
        "max_norm":40,
        "min_lr": 8.521333e-7,
        "model_name": "microsoft/deberta-v3-large",
        "n_accumulate": 8,
        "reinit_layernum":1,
        "stable_prob1":0.05,
        "stable_prob2":0,
        "stable_prob3":0.1,
        "stable_prob4":0.05,
        "stable_prob5":0,
        "text_encode":1,
        "token_dropout_prob":0.2,
        "token_dropout_ratio":0.2,
        "train_batch_size": 4,
        "weight_decay": 0,
        "exp_name":"baseline",
        "exp":0,
        "valid_batch_size": 2,
        "eval_step":600,
        "eval_start":3000,
        "scheduler": "CosineAnnealingLR",
        "n_fold": 4,
        "competition": "FeedBack3",
        "train_df": "/kaggle/train.csv",
        "test_df":"/kaggle/test.csv",
        "sweep_id":"furufuru/feedback3/zcu7lh4t",
        "seed_average": false    
    },
    "fold2":{
        "seed": 431,
        "fold_seed":518,
        "T_max": 350,
        "add_bntoken":1,
        "backbone_dropout_prob":0,
        "epochs": 6,
        "freeze_emb":1,
        "freeze_layernum":1,
        "head_dropout_prob":0,
        "head_lr":0.003607202,
        "init_head":1,
        "layer_norm_eps":1e-7,
        "learning_rate": 0.00001366467,
        "max_length": 1157,
        "max_norm":100,
        "min_lr": 4.0966938e-7,
        "model_name": "microsoft/deberta-v3-large",
        "n_accumulate": 11,
        "reinit_layernum":1,
        "stable_prob1":0.05,
        "stable_prob2":0.1,
        "stable_prob3":0.1,
        "stable_prob4":0.1,
        "stable_prob5":0.1,
        "text_encode":1,
        "token_dropout_prob":0.05,
        "token_dropout_ratio":0.05,
        "train_batch_size": 4,
        "weight_decay": 0,
        "exp_name":"baseline",
        "exp":0,
        "valid_batch_size": 2,
        "eval_step":600,
        "eval_start":3000,
        "scheduler": "CosineAnnealingLR",
        "n_fold": 4,
        "competition": "FeedBack3",
        "train_df": "/kaggle/train.csv",
        "test_df":"/kaggle/test.csv",
        "sweep_id":"furufuru/feedback3/1jfytjdi",
        "seed_average": true    
    },
    "fold3":{
        "seed": 431,
        "fold_seed":518,
        "T_max": 301,
        "add_bntoken":1,
        "backbone_dropout_prob":0,
        "epochs": 5,
        "freeze_emb":1,
        "freeze_layernum":6,
        "head_dropout_prob":0,
        "head_lr":0.00424512,
        "init_head":1,
        "layer_norm_eps":1e-7,
        "learning_rate": 0.00003627028,
        "max_length": 942,
        "max_norm":1,
        "min_lr": 7.43945e-7,
        "model_name": "microsoft/deberta-v3-large",
        "n_accumulate": 5,
        "reinit_layernum":2,
        "stable_prob1":0.05,
        "stable_prob2":0.1,
        "stable_prob3":0.1,
        "stable_prob4":0.05,
        "stable_prob5":0.1,
        "text_encode":0,
        "token_dropout_prob":0.2,
        "token_dropout_ratio":0.05,
        "train_batch_size": 4,
        "weight_decay": 0,
        "exp_name":"baseline",
        "exp":0,
        "valid_batch_size": 2,
        "eval_step":600,
        "eval_start":3000,
        "scheduler": "CosineAnnealingLR",
        "n_fold": 4,
        "competition": "FeedBack3",
        "train_df": "/kaggle/train.csv",
        "test_df":"/kaggle/test.csv",
        "sweep_id":"furufuru/feedback3/3t36pwbp",
        "seed_average": true
    }
}

## opt_parameters.json

In [None]:
%%writefile {savedir}/opt_parameters.json
{
    "project":"feedback3",
    "method": "bayes",
    "metric": {
        "name": "best_valid_loss",
        "goal": "minimize"
    },
    "early_terminate":{
        "type": "hyperband",
        "min_iter": 5
    },
    "parameters":{
        "reinit_layernum":{
            "values":[0,1,2,3]
        },
        "init_head":{
            "value": 1
        },
        "text_encode":{
            "values":[0,1]
        },
        "add_bntoken":{
            "values":[0,1]
        },
        "freeze_emb":{
            "values":[0,1]
        },
        "freeze_layernum":{
            "distribution":"int_uniform",
            "min":0,
            "max":6
        },
        "max_norm":{
            "values":[0,1,10,20,30,40,50,100]
        },
        "model_name":{
            "value": "microsoft/deberta-v3-large"
        },
        "epochs":{
            "values": [4,5,6]
        },
        "train_batch_size":{
            "value": 4
        },
        "stable_prob1":{
            "values": [0,0.05,0.1]
        },
        "stable_prob2":{
            "values": [0,0.05,0.1]
        },
        "stable_prob3":{
            "values": [0,0.05,0.1]
        },
        "stable_prob4":{
            "values": [0,0.05,0.1]
        },
        "stable_prob5":{
            "values": [0,0.05,0.1]
        },
        "token_dropout_ratio":{
            "values": [0.05,0.1,0.15,0.2,0.25]
        },
        "token_dropout_prob":{
            "values": [0,0.05,0.1,0.15,0.2,0.25]
        },
        "backbone_dropout_prob":{
            "value": 0
        },
        "head_dropout_prob":{
            "values": [0,0.05,0.1]
        },
        "layer_norm_eps":{
            "value": 1e-7
        },
        "learning_rate": {
            "distribution": "uniform",
            "min": 1e-5,
            "max": 6e-5
        },
        "head_lr":{
            "distribution": "uniform",
            "min": 1e-3,
            "max": 5e-3
        },
        "weight_decay":{
            "value": 0
        },
        "max_length":{
            "distribution": "int_uniform",
            "min":768,
            "max":1360
        },
        "n_accumulate":{
            "distribution": "int_uniform",
            "min":4,
            "max":20
        },
        "T_max":{
            "distribution": "int_uniform",
            "min":200,
            "max":400
        },
        "min_lr":{
            "distribution": "uniform",
            "min": 1e-7,
            "max": 1e-6
        }
    }
}

# kaggle datasetsの作成

In [None]:
%%writefile {savedir}/dataset-metadata.json

{
  "licenses": [
    {
      "name": "CC0-1.0"
    }
  ], 
  "id": "kunihikofurugori/baseline-4fold",
  "title": "beseline-4fold"
}

In [None]:
#bash用
print(savedir)
#!kaggle datasets version -p /kaggle/beaeline -m "update" -r zip

In [None]:
#!kaggle datasets create -p {savedir} -r zip
#!kaggle datasets version -p {savedir} -m "5fold_update" -r zip

# bash実行

In [None]:
!python3 {savedir}/train.py --savedir {savedir} --savedir_drive {savedir_drive} --root_dir {ROOT_PATH} --train_fold 2 3
#!python3 {savedir}/sweep.py --version {version} --savedir {savedir} --savedir_drive {savedir_drive} --root_dir {ROOT_PATH} --fold 0 --resume