In [1]:
from transformers import AutoModelForMaskedLM, AutoConfig
from torch.optim import AdamW
from torch.utils.data import DataLoader
from tqdm import tqdm
import pandas as pd
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from transformers import AutoTokenizer
from torch.utils.data import Dataset
import pandas as pd
import torch
from torch import nn
from time import time
from torch.autograd import Variable
import torch.nn.functional as F
from torch.optim import Adadelta

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



/kaggle/input/dberta-base-model/rust_model.ot
/kaggle/input/dberta-base-model/config.json
/kaggle/input/dberta-base-model/merges.txt
/kaggle/input/dberta-base-model/vocab.json
/kaggle/input/dberta-base-model/tf_model.h5
/kaggle/input/dberta-base-model/tokenizer_config.json
/kaggle/input/dberta-base-model/bpe_encoder.bin
/kaggle/input/dberta-base-model/pytorch_model.bin
/kaggle/input/cnn-sent-mod/CNN_SENT_MODEL.bin
/kaggle/input/feedback-prize-english-language-learning/sample_submission.csv
/kaggle/input/feedback-prize-english-language-learning/train.csv
/kaggle/input/feedback-prize-english-language-learning/test.csv
/kaggle/input/big-embedder/1_epoch_mlm.bin


In [2]:
class MLMCONFIG:
    SOURCE = 'https://www.kaggle.com/code/chaitanyagiri/deberta-pre-training-using-mlm'
    TRAIN = "/kaggle/input/feedback-prize-english-language-learning/train.csv"
    TEST = "/kaggle/input/feedback-prize-english-language-learning/test.csv"
    MASKING = 0.10
    MAX_LEN = 356
    CLS_TOKEN = 1
    PAD_TOKEN = 0
    SEP_TOKEN = 2
    MASK_TOKEN = 50264
    BATCH_SIZE = 8
    TEST_BATCH_SIZE = 1
    EPOCHS = 1
    LR = 1e-5
    MODEL_PATH = "/kaggle/input/dberta-base-model/"
    SAVE_PATH = "/kaggle/working/fine-tuned-mlm.bin"
    FINE_TUNED = "/kaggle/input/big-embedder/1_epoch_mlm.bin"
    NUM_WORKERS = 4

    
class CNN_CONFIG:
    TRAIN = "/kaggle/input/feedback-prize-english-language-learning/train.csv"
    TEST = "/kaggle/input/feedback-prize-english-language-learning/test.csv"
    LR = 0.001
    TRAIN_BATCH_SIZE = 32
    TEST_BATCH_SIZE = 8
    EPOCHS = 20
    N_FOLDS = 4
    TARGETS = ['cohesion', 'syntax', 'vocabulary',
       'phraseology', 'grammar', 'conventions']
    PADDING = False
    NUM_WORKERS = 4
    FINE_TUNED = "/kaggle/input/cnn-sent-mod/CNN_SENT_MODEL.bin"
  

In [3]:
class TrainMLMDatasetup(Dataset):
    def __init__(self, data):
        self.data = data["full_text"]
        self.tokenizer = AutoTokenizer.from_pretrained(MLMCONFIG.MODEL_PATH)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, item):
        text = self.data[item]
        inputs = self.tokenizer.encode_plus(
            text,
            return_tensors='pt',
            add_special_tokens=True,
            max_length=MLMCONFIG.MAX_LEN,
            pad_to_max_length=True,
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=True,
        )

        # copy input ids to create out labels tensor
        inputs["labels"] = inputs.input_ids.detach().clone()

        # create a set of floats and mask anything below our masking %
        # include logic to avoid masking the [CLS] [PAD], & [SEP] tokens
        rand = torch.rand(inputs.input_ids.shape)
        masked_arr = (
            (rand < MLMCONFIG.MASKING)
            * (inputs.labels != MLMCONFIG.CLS_TOKEN)
            * (inputs.labels != MLMCONFIG.SEP_TOKEN)
            * (inputs.labels != MLMCONFIG.PAD_TOKEN)
        )

        # get the non-zeros from the masked array
        selection = []
        for idx in range(masked_arr.shape[0]):
            selection.append(torch.flatten(masked_arr[idx].nonzero()).tolist())

        # use selection to mask the input_ids based upon the config.MASKING set
        for idx in range(masked_arr.shape[0]):
            inputs.input_ids[idx, selection[idx]] = MLMCONFIG.MASK_TOKEN

        input_ids = torch.flatten(inputs.input_ids)
        attention_mask = torch.flatten(inputs.attention_mask)
        labels = torch.flatten(inputs.labels)
        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels,
        }

    
    
    
class TestMLMDatasetup(Dataset):
    def __init__(self, data):
        self.data = data["full_text"]
        self.tokenizer = AutoTokenizer.from_pretrained(MLMCONFIG.MODEL_PATH)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, item):
        text = self.data[item]
        inputs = self.tokenizer.encode_plus(
            text,
            return_tensors='pt',
            add_special_tokens=True,
            max_length=MLMCONFIG.MAX_LEN,
            pad_to_max_length=True,
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=True,
        )

        input_ids = torch.flatten(inputs.input_ids)
        attention_mask = torch.flatten(inputs.attention_mask)
        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
        }


In [4]:
class MLMFineTuner(nn.Module):
    def __init__(self):
        super(MLMFineTuner, self).__init__()
        self.epochs = MLMCONFIG.EPOCHS
        self.learning_rate = MLMCONFIG.LR
        self.config = AutoConfig.from_pretrained(MLMCONFIG.MODEL_PATH, output_hidden_states=True)
        self.model = AutoModelForMaskedLM.from_pretrained(MLMCONFIG.MODEL_PATH, config=self.config)
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device("cpu")

    def fit(self, data):

        train_dataset = TrainMLMDatasetup(data)
        train_dataloader = DataLoader(train_dataset, batch_size=MLMCONFIG.BATCH_SIZE, shuffle=True, num_workers=MLMCONFIG.NUM_WORKERS)
        self.model.to(self.device)
        self.model.train()
        optim = AdamW(self.model.parameters(), lr=MLMCONFIG.LR)
        for epoch in range(self.epochs):
            for idx, batch in enumerate(train_dataloader):
                self.model.zero_grad()
                input_ids = batch['input_ids'].to(self.device)
                labels = batch['labels'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                output = self.model(input_ids, attention_mask=attention_mask, labels=labels)
                loss = output.loss
                print(f"epoch {epoch} --- step {idx} --- step size {input_ids.shape[0]} loss {loss.item()}")
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                loss.backward()
                optim.step()
            torch.save(self.model.state_dict(), MLMCONFIG.SAVE_PATH)
        
    def get_embeddings(self, X):
        test_dataset = TestMLMDatasetup(data=X)
        test_loader = DataLoader(
            test_dataset,
            batch_size=MLMCONFIG.TEST_BATCH_SIZE,
            shuffle=False,
            num_workers=MLMCONFIG.NUM_WORKERS,
            pin_memory=True,
            drop_last=False,
        )

        self.model.load_state_dict(
            torch.load(MLMCONFIG.FINE_TUNED)
        )
        self.model.to(self.device)
        self.model.eval()
        token_vectors = []
        for idx, batch in enumerate(tqdm(test_loader)):
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            outputs = self.model(input_ids, attention_mask=attention_mask, output_hidden_states=True)
            hidden_states = outputs.hidden_states
            token_vectors.append(hidden_states[-2][0].cpu().detach())
        return token_vectors
        

In [5]:
def targets_to_tensor(df, target_columns):
    return torch.tensor(df[target_columns].values, dtype=torch.float32)

In [6]:
TRAIN = pd.read_csv(MLMCONFIG.TRAIN) 
TEST = pd.read_csv(MLMCONFIG.TEST) 

In [7]:
run_mlm = 0
if run_mlm:
    mlm = MLMFineTuner()
    mlm.fit(data=TRAIN)


In [8]:
class TrainEmbeddingDataset(Dataset):
    def __init__(self, data):
        mlm = MLMFineTuner()
        self.data = mlm.get_embeddings(X=data)
        self.targets = targets_to_tensor(df=data,target_columns=CNN_CONFIG.TARGETS)
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, item):
        record = self.data[item]
        target = self.targets[item]
        
        return {
            "input_ids": record,
            "labels": torch.tensor(target, dtype=torch.long),
        }

    
class TestEmbeddingDataset(Dataset):
    def __init__(self, data):
        mlm = MLMFineTuner()
        self.data = mlm.get_embeddings(X=data)
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, item):
        record = self.data[item]
        
        return {
            "input_ids": record,
        }

In [9]:
class SentCNN(nn.Module):
    def __init__(self):
        super(SentCNN, self).__init__()
        dropout = 0.5
        static = True
        V = MLMCONFIG.MAX_LEN
        D = 768
        C = 6
        Co = 3
        Ks = [3, 4, 5]
        
        self.static = static
        self.embed = nn.Embedding(V, D)
        self.convs1 = nn.ModuleList([nn.Conv2d(1, Co, (K, D)) for K in Ks])
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(len(Ks) * Co, 6)

    def forward(self, x):
        x = Variable(x)

        x = x.unsqueeze(1)  # (N, Ci, W, D)

        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1]

        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]  # [(N, Co), ...]*len(Ks)

        x = torch.cat(x, 1)
        x = self.dropout(x)  # (N, len(Ks)*Co)
        logit = self.fc(x)  # (N, C)
        return logit

In [10]:
class SentenceCNNTrainer(nn.Module):
    def __init__(self):
        super(SentenceCNNTrainer, self).__init__()
        self.model = SentCNN()
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device("cpu")
    
    def fit(self, data):
        trainembedderset = TrainEmbeddingDataset(data=data)
        train_dataloader = DataLoader(
            trainembedderset,
            batch_size=CNN_CONFIG.TRAIN_BATCH_SIZE,
            shuffle=True,
            num_workers=CNN_CONFIG.NUM_WORKERS,
            pin_memory=True,
            drop_last=True,
        )
        criterion = nn.SmoothL1Loss(reduction='mean')
        self.model.to(self.device)
        self.model.train()
        optimizer = AdamW(params=self.model.parameters(), lr=CNN_CONFIG.LR)
        for epoch in range(CNN_CONFIG.EPOCHS):
            for step, batch in enumerate(train_dataloader):
                self.model.zero_grad()
                input_ids = batch["input_ids"].to(self.device)
                labels = batch["labels"].to(self.device)
                prediction_probas = self.model(input_ids)
                loss = criterion(prediction_probas, labels)
                loss.backward()
                print(f"epoch {epoch} --- step {step} --- step size {input_ids.shape[0]} loss {loss.item()}")
                # Update model parameters:
                # fine tune BERT params and train additional dense layers
                optimizer.step()
                # update learning rate
            torch.save(self.model.state_dict(), "CNN_SENT_MODEL.bin")
    def predict(self, data):
        testembedderset = TestEmbeddingDataset(data=data)
        test_dataloader = DataLoader(
            testembedderset,
            batch_size=1,
            shuffle=True,
            num_workers=CNN_CONFIG.NUM_WORKERS,
            pin_memory=True,
            drop_last=True,
        )
        self.model.eval()
        predictions = []
        for step, batch in enumerate(tqdm(test_dataloader)):
            input_ids = batch["input_ids"].to(self.device)
            with torch.no_grad():
                pred_probas = self.model(input_ids)
                
            predictions.append(pred_probas.cpu().detach().numpy())
        predictions = pd.DataFrame(np.concatenate(predictions))
        predictions.columns = CNN_CONFIG.TARGETS
        return predictions



In [11]:
clf = SentenceCNNTrainer()
clf.fit(data=TRAIN)

Some weights of the model checkpoint at /kaggle/input/dberta-base-model/ were not used when initializing DebertaForMaskedLM: ['lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'deberta.embeddings.position_embeddings.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias']
- This IS expected if you are initializing DebertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForMaskedLM were not initialized from the model checkpoint at /kaggle/input/dberta-base-model/ and are newly initialized: ['cls.predictions.

epoch 0 --- step 0 --- step size 32 loss 2.7028234004974365
epoch 0 --- step 1 --- step size 32 loss 2.340407371520996
epoch 0 --- step 2 --- step size 32 loss 2.3022775650024414
epoch 0 --- step 3 --- step size 32 loss 2.3712892532348633
epoch 0 --- step 4 --- step size 32 loss 2.304643154144287
epoch 0 --- step 5 --- step size 32 loss 2.1172547340393066
epoch 0 --- step 6 --- step size 32 loss 2.2287089824676514
epoch 0 --- step 7 --- step size 32 loss 2.2328388690948486
epoch 0 --- step 8 --- step size 32 loss 2.1692614555358887
epoch 0 --- step 9 --- step size 32 loss 2.2546162605285645
epoch 0 --- step 10 --- step size 32 loss 2.159183979034424
epoch 0 --- step 11 --- step size 32 loss 2.151548385620117
epoch 0 --- step 12 --- step size 32 loss 2.0133614540100098
epoch 0 --- step 13 --- step size 32 loss 1.900999903678894
epoch 0 --- step 14 --- step size 32 loss 2.2243897914886475
epoch 0 --- step 15 --- step size 32 loss 2.0869388580322266
epoch 0 --- step 16 --- step size 32 lo

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 1 --- step 0 --- step size 32 loss 1.4548826217651367
epoch 1 --- step 1 --- step size 32 loss 1.4775099754333496
epoch 1 --- step 2 --- step size 32 loss 1.5124086141586304
epoch 1 --- step 3 --- step size 32 loss 1.6611522436141968
epoch 1 --- step 4 --- step size 32 loss 1.5162461996078491
epoch 1 --- step 5 --- step size 32 loss 1.444655418395996
epoch 1 --- step 6 --- step size 32 loss 1.4352641105651855
epoch 1 --- step 7 --- step size 32 loss 1.488265037536621
epoch 1 --- step 8 --- step size 32 loss 1.5790613889694214
epoch 1 --- step 9 --- step size 32 loss 1.3008599281311035
epoch 1 --- step 10 --- step size 32 loss 1.5947318077087402
epoch 1 --- step 11 --- step size 32 loss 1.4795112609863281
epoch 1 --- step 12 --- step size 32 loss 1.3173869848251343
epoch 1 --- step 13 --- step size 32 loss 1.3626420497894287
epoch 1 --- step 14 --- step size 32 loss 1.5544264316558838
epoch 1 --- step 15 --- step size 32 loss 1.3862273693084717
epoch 1 --- step 16 --- step size 32

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 2 --- step 0 --- step size 32 loss 0.7930294275283813
epoch 2 --- step 1 --- step size 32 loss 1.2406840324401855
epoch 2 --- step 2 --- step size 32 loss 0.8518131971359253
epoch 2 --- step 3 --- step size 32 loss 1.0576876401901245
epoch 2 --- step 4 --- step size 32 loss 1.1208155155181885
epoch 2 --- step 5 --- step size 32 loss 0.8274275064468384
epoch 2 --- step 6 --- step size 32 loss 1.3087491989135742
epoch 2 --- step 7 --- step size 32 loss 1.1989595890045166
epoch 2 --- step 8 --- step size 32 loss 1.0116784572601318
epoch 2 --- step 9 --- step size 32 loss 0.8142489790916443
epoch 2 --- step 10 --- step size 32 loss 1.0984776020050049
epoch 2 --- step 11 --- step size 32 loss 1.1637020111083984
epoch 2 --- step 12 --- step size 32 loss 0.9215742945671082
epoch 2 --- step 13 --- step size 32 loss 1.0822912454605103
epoch 2 --- step 14 --- step size 32 loss 1.0401074886322021
epoch 2 --- step 15 --- step size 32 loss 0.9928900599479675
epoch 2 --- step 16 --- step size 

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 3 --- step 0 --- step size 32 loss 0.9000314474105835
epoch 3 --- step 1 --- step size 32 loss 1.1312274932861328
epoch 3 --- step 2 --- step size 32 loss 0.9971312880516052
epoch 3 --- step 3 --- step size 32 loss 0.8865262866020203
epoch 3 --- step 4 --- step size 32 loss 0.8181437253952026
epoch 3 --- step 5 --- step size 32 loss 1.179244041442871
epoch 3 --- step 6 --- step size 32 loss 0.938035249710083
epoch 3 --- step 7 --- step size 32 loss 0.9198497533798218
epoch 3 --- step 8 --- step size 32 loss 1.0521676540374756
epoch 3 --- step 9 --- step size 32 loss 0.9147354364395142
epoch 3 --- step 10 --- step size 32 loss 0.9763995409011841
epoch 3 --- step 11 --- step size 32 loss 0.9963921904563904
epoch 3 --- step 12 --- step size 32 loss 0.9816960692405701
epoch 3 --- step 13 --- step size 32 loss 0.9184975028038025
epoch 3 --- step 14 --- step size 32 loss 0.9016556739807129
epoch 3 --- step 15 --- step size 32 loss 1.0667763948440552
epoch 3 --- step 16 --- step size 32

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 4 --- step 0 --- step size 32 loss 1.0021775960922241
epoch 4 --- step 1 --- step size 32 loss 0.8883436918258667
epoch 4 --- step 2 --- step size 32 loss 1.04630446434021
epoch 4 --- step 3 --- step size 32 loss 1.193956732749939
epoch 4 --- step 4 --- step size 32 loss 0.9205971956253052
epoch 4 --- step 5 --- step size 32 loss 0.9954632520675659
epoch 4 --- step 6 --- step size 32 loss 0.8895813822746277
epoch 4 --- step 7 --- step size 32 loss 1.0067473649978638
epoch 4 --- step 8 --- step size 32 loss 1.0647318363189697
epoch 4 --- step 9 --- step size 32 loss 1.2216904163360596
epoch 4 --- step 10 --- step size 32 loss 1.0764868259429932
epoch 4 --- step 11 --- step size 32 loss 0.7904078364372253
epoch 4 --- step 12 --- step size 32 loss 1.229978084564209
epoch 4 --- step 13 --- step size 32 loss 0.7029874324798584
epoch 4 --- step 14 --- step size 32 loss 1.095947027206421
epoch 4 --- step 15 --- step size 32 loss 0.7513853907585144
epoch 4 --- step 16 --- step size 32 lo

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 5 --- step 0 --- step size 32 loss 0.6874631643295288
epoch 5 --- step 1 --- step size 32 loss 0.9219799041748047
epoch 5 --- step 2 --- step size 32 loss 0.9414147734642029
epoch 5 --- step 3 --- step size 32 loss 1.0608611106872559
epoch 5 --- step 4 --- step size 32 loss 1.0212328433990479
epoch 5 --- step 5 --- step size 32 loss 0.7373861074447632
epoch 5 --- step 6 --- step size 32 loss 0.7290773391723633
epoch 5 --- step 7 --- step size 32 loss 0.8841294050216675
epoch 5 --- step 8 --- step size 32 loss 0.773552656173706
epoch 5 --- step 9 --- step size 32 loss 0.8199908137321472
epoch 5 --- step 10 --- step size 32 loss 0.8379899859428406
epoch 5 --- step 11 --- step size 32 loss 1.057822585105896
epoch 5 --- step 12 --- step size 32 loss 0.8003783226013184
epoch 5 --- step 13 --- step size 32 loss 0.8412742614746094
epoch 5 --- step 14 --- step size 32 loss 0.7406493425369263
epoch 5 --- step 15 --- step size 32 loss 0.7789849638938904
epoch 5 --- step 16 --- step size 32

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 6 --- step 0 --- step size 32 loss 1.035840392112732
epoch 6 --- step 1 --- step size 32 loss 0.8038513660430908
epoch 6 --- step 2 --- step size 32 loss 0.8911392092704773
epoch 6 --- step 3 --- step size 32 loss 0.8971449732780457
epoch 6 --- step 4 --- step size 32 loss 0.599969744682312
epoch 6 --- step 5 --- step size 32 loss 0.9239029884338379
epoch 6 --- step 6 --- step size 32 loss 0.8480814695358276
epoch 6 --- step 7 --- step size 32 loss 0.9310060739517212
epoch 6 --- step 8 --- step size 32 loss 1.0460331439971924
epoch 6 --- step 9 --- step size 32 loss 0.8564097285270691
epoch 6 --- step 10 --- step size 32 loss 0.961037814617157
epoch 6 --- step 11 --- step size 32 loss 0.7574424743652344
epoch 6 --- step 12 --- step size 32 loss 0.9451439380645752
epoch 6 --- step 13 --- step size 32 loss 0.9186372756958008
epoch 6 --- step 14 --- step size 32 loss 0.8685213327407837
epoch 6 --- step 15 --- step size 32 loss 0.8400440216064453
epoch 6 --- step 16 --- step size 32 

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 7 --- step 0 --- step size 32 loss 0.6725301742553711
epoch 7 --- step 1 --- step size 32 loss 0.9778841733932495
epoch 7 --- step 2 --- step size 32 loss 0.9795126914978027
epoch 7 --- step 3 --- step size 32 loss 0.574864387512207
epoch 7 --- step 4 --- step size 32 loss 0.5677630305290222
epoch 7 --- step 5 --- step size 32 loss 0.9376203417778015
epoch 7 --- step 6 --- step size 32 loss 0.7206381559371948
epoch 7 --- step 7 --- step size 32 loss 0.8179852962493896
epoch 7 --- step 8 --- step size 32 loss 0.8266403675079346
epoch 7 --- step 9 --- step size 32 loss 0.8451043963432312
epoch 7 --- step 10 --- step size 32 loss 0.8853945732116699
epoch 7 --- step 11 --- step size 32 loss 1.0046075582504272
epoch 7 --- step 12 --- step size 32 loss 0.978888213634491
epoch 7 --- step 13 --- step size 32 loss 0.6538918614387512
epoch 7 --- step 14 --- step size 32 loss 0.8434879183769226
epoch 7 --- step 15 --- step size 32 loss 0.7714749574661255
epoch 7 --- step 16 --- step size 32

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 8 --- step 0 --- step size 32 loss 0.7080212831497192
epoch 8 --- step 1 --- step size 32 loss 0.7145084142684937
epoch 8 --- step 2 --- step size 32 loss 0.8021794557571411
epoch 8 --- step 3 --- step size 32 loss 0.7408468127250671
epoch 8 --- step 4 --- step size 32 loss 0.8299428820610046
epoch 8 --- step 5 --- step size 32 loss 0.850614070892334
epoch 8 --- step 6 --- step size 32 loss 0.5214703679084778
epoch 8 --- step 7 --- step size 32 loss 0.704089343547821
epoch 8 --- step 8 --- step size 32 loss 0.6579766273498535
epoch 8 --- step 9 --- step size 32 loss 0.6837639212608337
epoch 8 --- step 10 --- step size 32 loss 0.8841185569763184
epoch 8 --- step 11 --- step size 32 loss 0.5580471754074097
epoch 8 --- step 12 --- step size 32 loss 0.611538290977478
epoch 8 --- step 13 --- step size 32 loss 0.5653799772262573
epoch 8 --- step 14 --- step size 32 loss 0.6477466225624084
epoch 8 --- step 15 --- step size 32 loss 0.697677493095398
epoch 8 --- step 16 --- step size 32 l

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 9 --- step 0 --- step size 32 loss 0.6381730437278748
epoch 9 --- step 1 --- step size 32 loss 0.7776914834976196
epoch 9 --- step 2 --- step size 32 loss 0.8415529727935791
epoch 9 --- step 3 --- step size 32 loss 0.895645797252655
epoch 9 --- step 4 --- step size 32 loss 0.8523887395858765
epoch 9 --- step 5 --- step size 32 loss 0.6597689986228943
epoch 9 --- step 6 --- step size 32 loss 0.4611775577068329
epoch 9 --- step 7 --- step size 32 loss 0.5555219054222107
epoch 9 --- step 8 --- step size 32 loss 0.49104997515678406
epoch 9 --- step 9 --- step size 32 loss 0.41784751415252686
epoch 9 --- step 10 --- step size 32 loss 0.9559122920036316
epoch 9 --- step 11 --- step size 32 loss 0.5581264495849609
epoch 9 --- step 12 --- step size 32 loss 0.9784551858901978
epoch 9 --- step 13 --- step size 32 loss 0.5030515789985657
epoch 9 --- step 14 --- step size 32 loss 0.6695941686630249
epoch 9 --- step 15 --- step size 32 loss 0.5940180420875549
epoch 9 --- step 16 --- step size

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 10 --- step 0 --- step size 32 loss 0.6029194593429565
epoch 10 --- step 1 --- step size 32 loss 0.6589966416358948
epoch 10 --- step 2 --- step size 32 loss 0.6348452568054199
epoch 10 --- step 3 --- step size 32 loss 0.7099733352661133
epoch 10 --- step 4 --- step size 32 loss 0.5001696944236755
epoch 10 --- step 5 --- step size 32 loss 0.6206339597702026
epoch 10 --- step 6 --- step size 32 loss 0.5583438873291016
epoch 10 --- step 7 --- step size 32 loss 0.6093217730522156
epoch 10 --- step 8 --- step size 32 loss 0.8634888529777527
epoch 10 --- step 9 --- step size 32 loss 0.6426889300346375
epoch 10 --- step 10 --- step size 32 loss 0.8504513502120972
epoch 10 --- step 11 --- step size 32 loss 0.7041780948638916
epoch 10 --- step 12 --- step size 32 loss 0.5892360210418701
epoch 10 --- step 13 --- step size 32 loss 0.30662959814071655
epoch 10 --- step 14 --- step size 32 loss 0.7097091674804688
epoch 10 --- step 15 --- step size 32 loss 0.6030022501945496
epoch 10 --- step

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 11 --- step 0 --- step size 32 loss 0.6036339998245239
epoch 11 --- step 1 --- step size 32 loss 0.6178892254829407
epoch 11 --- step 2 --- step size 32 loss 0.5949679613113403
epoch 11 --- step 3 --- step size 32 loss 0.6775168180465698
epoch 11 --- step 4 --- step size 32 loss 0.7103623151779175
epoch 11 --- step 5 --- step size 32 loss 0.5993355512619019
epoch 11 --- step 6 --- step size 32 loss 0.6822556257247925
epoch 11 --- step 7 --- step size 32 loss 0.6403923034667969
epoch 11 --- step 8 --- step size 32 loss 0.55438232421875
epoch 11 --- step 9 --- step size 32 loss 0.5267961025238037
epoch 11 --- step 10 --- step size 32 loss 0.5262545347213745
epoch 11 --- step 11 --- step size 32 loss 0.6154951453208923
epoch 11 --- step 12 --- step size 32 loss 0.7278600931167603
epoch 11 --- step 13 --- step size 32 loss 0.7436599731445312
epoch 11 --- step 14 --- step size 32 loss 0.3885658383369446
epoch 11 --- step 15 --- step size 32 loss 0.4282308518886566
epoch 11 --- step 16

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 12 --- step 0 --- step size 32 loss 0.5220321416854858
epoch 12 --- step 1 --- step size 32 loss 0.6830662488937378
epoch 12 --- step 2 --- step size 32 loss 0.5366396903991699
epoch 12 --- step 3 --- step size 32 loss 0.7276802062988281
epoch 12 --- step 4 --- step size 32 loss 0.45973441004753113
epoch 12 --- step 5 --- step size 32 loss 0.786472499370575
epoch 12 --- step 6 --- step size 32 loss 0.6102802753448486
epoch 12 --- step 7 --- step size 32 loss 0.5491071939468384
epoch 12 --- step 8 --- step size 32 loss 0.6715575456619263
epoch 12 --- step 9 --- step size 32 loss 0.6796800494194031
epoch 12 --- step 10 --- step size 32 loss 0.4728120267391205
epoch 12 --- step 11 --- step size 32 loss 0.531541109085083
epoch 12 --- step 12 --- step size 32 loss 0.5566167831420898
epoch 12 --- step 13 --- step size 32 loss 0.5695840716362
epoch 12 --- step 14 --- step size 32 loss 0.7373594045639038
epoch 12 --- step 15 --- step size 32 loss 0.5826961994171143
epoch 12 --- step 16 -

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 13 --- step 0 --- step size 32 loss 0.6508440971374512
epoch 13 --- step 1 --- step size 32 loss 0.5336178541183472
epoch 13 --- step 2 --- step size 32 loss 0.64972984790802
epoch 13 --- step 3 --- step size 32 loss 0.45799654722213745
epoch 13 --- step 4 --- step size 32 loss 0.4390489459037781
epoch 13 --- step 5 --- step size 32 loss 0.5378383994102478
epoch 13 --- step 6 --- step size 32 loss 0.5054198503494263
epoch 13 --- step 7 --- step size 32 loss 0.5147808790206909
epoch 13 --- step 8 --- step size 32 loss 0.5729013085365295
epoch 13 --- step 9 --- step size 32 loss 0.572144091129303
epoch 13 --- step 10 --- step size 32 loss 0.392514705657959
epoch 13 --- step 11 --- step size 32 loss 0.6190312504768372
epoch 13 --- step 12 --- step size 32 loss 0.756989598274231
epoch 13 --- step 13 --- step size 32 loss 0.6214737296104431
epoch 13 --- step 14 --- step size 32 loss 0.5788519382476807
epoch 13 --- step 15 --- step size 32 loss 0.5516217947006226
epoch 13 --- step 16 -

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 14 --- step 0 --- step size 32 loss 0.6093695163726807
epoch 14 --- step 1 --- step size 32 loss 0.595442533493042
epoch 14 --- step 2 --- step size 32 loss 0.4080328941345215
epoch 14 --- step 3 --- step size 32 loss 0.5854384303092957
epoch 14 --- step 4 --- step size 32 loss 0.5035566091537476
epoch 14 --- step 5 --- step size 32 loss 0.4625512957572937
epoch 14 --- step 6 --- step size 32 loss 0.5905678272247314
epoch 14 --- step 7 --- step size 32 loss 0.6599698066711426
epoch 14 --- step 8 --- step size 32 loss 0.5707839727401733
epoch 14 --- step 9 --- step size 32 loss 0.405874103307724
epoch 14 --- step 10 --- step size 32 loss 0.6535835266113281
epoch 14 --- step 11 --- step size 32 loss 0.5114632844924927
epoch 14 --- step 12 --- step size 32 loss 0.6329573392868042
epoch 14 --- step 13 --- step size 32 loss 0.5938897132873535
epoch 14 --- step 14 --- step size 32 loss 0.5535603165626526
epoch 14 --- step 15 --- step size 32 loss 0.4901411533355713
epoch 14 --- step 16

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 15 --- step 0 --- step size 32 loss 0.54755038022995
epoch 15 --- step 1 --- step size 32 loss 0.46295613050460815
epoch 15 --- step 2 --- step size 32 loss 0.6235133409500122
epoch 15 --- step 3 --- step size 32 loss 0.5242788791656494
epoch 15 --- step 4 --- step size 32 loss 0.48005351424217224
epoch 15 --- step 5 --- step size 32 loss 0.4033750295639038
epoch 15 --- step 6 --- step size 32 loss 0.4326961934566498
epoch 15 --- step 7 --- step size 32 loss 0.40298086404800415
epoch 15 --- step 8 --- step size 32 loss 0.6769436597824097
epoch 15 --- step 9 --- step size 32 loss 0.4667550027370453
epoch 15 --- step 10 --- step size 32 loss 0.3437741696834564
epoch 15 --- step 11 --- step size 32 loss 0.38571876287460327
epoch 15 --- step 12 --- step size 32 loss 0.42033421993255615
epoch 15 --- step 13 --- step size 32 loss 0.3394782543182373
epoch 15 --- step 14 --- step size 32 loss 0.5270661115646362
epoch 15 --- step 15 --- step size 32 loss 0.5571837425231934
epoch 15 --- st

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 16 --- step 0 --- step size 32 loss 0.40697145462036133
epoch 16 --- step 1 --- step size 32 loss 0.5074145793914795
epoch 16 --- step 2 --- step size 32 loss 0.4093131422996521
epoch 16 --- step 3 --- step size 32 loss 0.3502713143825531
epoch 16 --- step 4 --- step size 32 loss 0.2593677043914795
epoch 16 --- step 5 --- step size 32 loss 0.437974750995636
epoch 16 --- step 6 --- step size 32 loss 0.5780662298202515
epoch 16 --- step 7 --- step size 32 loss 0.6790172457695007
epoch 16 --- step 8 --- step size 32 loss 0.4894756078720093
epoch 16 --- step 9 --- step size 32 loss 0.47651827335357666
epoch 16 --- step 10 --- step size 32 loss 0.31996968388557434
epoch 16 --- step 11 --- step size 32 loss 0.5669755935668945
epoch 16 --- step 12 --- step size 32 loss 0.46930834650993347
epoch 16 --- step 13 --- step size 32 loss 0.45395809412002563
epoch 16 --- step 14 --- step size 32 loss 0.5688636302947998
epoch 16 --- step 15 --- step size 32 loss 0.388799250125885
epoch 16 --- st

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 17 --- step 0 --- step size 32 loss 0.40716981887817383
epoch 17 --- step 1 --- step size 32 loss 0.3894590139389038
epoch 17 --- step 2 --- step size 32 loss 0.34579038619995117
epoch 17 --- step 3 --- step size 32 loss 0.5177112221717834
epoch 17 --- step 4 --- step size 32 loss 0.41602903604507446
epoch 17 --- step 5 --- step size 32 loss 0.3798186182975769
epoch 17 --- step 6 --- step size 32 loss 0.4725450873374939
epoch 17 --- step 7 --- step size 32 loss 0.281680166721344
epoch 17 --- step 8 --- step size 32 loss 0.4046476483345032
epoch 17 --- step 9 --- step size 32 loss 0.44176894426345825
epoch 17 --- step 10 --- step size 32 loss 0.45880016684532166
epoch 17 --- step 11 --- step size 32 loss 0.4335342347621918
epoch 17 --- step 12 --- step size 32 loss 0.3637373447418213
epoch 17 --- step 13 --- step size 32 loss 0.43227753043174744
epoch 17 --- step 14 --- step size 32 loss 0.42169734835624695
epoch 17 --- step 15 --- step size 32 loss 0.43780678510665894
epoch 17 --

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 18 --- step 0 --- step size 32 loss 0.3607901930809021
epoch 18 --- step 1 --- step size 32 loss 0.31305575370788574
epoch 18 --- step 2 --- step size 32 loss 0.3599469065666199
epoch 18 --- step 3 --- step size 32 loss 0.3265160620212555
epoch 18 --- step 4 --- step size 32 loss 0.414347380399704
epoch 18 --- step 5 --- step size 32 loss 0.3421704173088074
epoch 18 --- step 6 --- step size 32 loss 0.3872523009777069
epoch 18 --- step 7 --- step size 32 loss 0.4359002709388733
epoch 18 --- step 8 --- step size 32 loss 0.23913463950157166
epoch 18 --- step 9 --- step size 32 loss 0.4013979434967041
epoch 18 --- step 10 --- step size 32 loss 0.3895721435546875
epoch 18 --- step 11 --- step size 32 loss 0.444241464138031
epoch 18 --- step 12 --- step size 32 loss 0.3094918429851532
epoch 18 --- step 13 --- step size 32 loss 0.3495004177093506
epoch 18 --- step 14 --- step size 32 loss 0.42758092284202576
epoch 18 --- step 15 --- step size 32 loss 0.3877057433128357
epoch 18 --- step

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


epoch 19 --- step 0 --- step size 32 loss 0.35611170530319214
epoch 19 --- step 1 --- step size 32 loss 0.35133689641952515
epoch 19 --- step 2 --- step size 32 loss 0.3484143018722534
epoch 19 --- step 3 --- step size 32 loss 0.26366177201271057
epoch 19 --- step 4 --- step size 32 loss 0.37439775466918945
epoch 19 --- step 5 --- step size 32 loss 0.3765714764595032
epoch 19 --- step 6 --- step size 32 loss 0.3203573226928711
epoch 19 --- step 7 --- step size 32 loss 0.26533132791519165
epoch 19 --- step 8 --- step size 32 loss 0.3712722659111023
epoch 19 --- step 9 --- step size 32 loss 0.4896305203437805
epoch 19 --- step 10 --- step size 32 loss 0.3993581235408783
epoch 19 --- step 11 --- step size 32 loss 0.5821589231491089
epoch 19 --- step 12 --- step size 32 loss 0.3498574495315552
epoch 19 --- step 13 --- step size 32 loss 0.3530066907405853
epoch 19 --- step 14 --- step size 32 loss 0.393622487783432
epoch 19 --- step 15 --- step size 32 loss 0.3191549777984619
epoch 19 --- s

In [12]:

predictions = clf.predict(data=TEST)
submission = pd.merge(TEST, predictions, left_index=True,right_index=True)
submission = submission.drop('full_text',axis=1)
submission.to_csv('submission.csv', index=False)


Some weights of the model checkpoint at /kaggle/input/dberta-base-model/ were not used when initializing DebertaForMaskedLM: ['lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'deberta.embeddings.position_embeddings.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias']
- This IS expected if you are initializing DebertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForMaskedLM were not initialized from the model checkpoint at /kaggle/input/dberta-base-model/ and are newly initialized: ['cls.predictions.

In [13]:
submission

Unnamed: 0,text_id,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0000C359D63E,2.745597,2.571096,2.752243,2.60453,2.616288,2.687251
1,000BAD50D026,2.479983,2.218592,2.382292,2.226262,2.300099,2.428221
2,00367BB2546B,2.659872,2.457405,2.63287,2.482533,2.51423,2.60366
