In [1]:
'''
fine-tune CodeBERT
Author: Liu Jin Cheng
'''
from transformers import RobertaForSequenceClassification,RobertaTokenizer,get_linear_schedule_with_warmup
from datasets import load_dataset,Dataset
import os
import random
import numpy as np
import evaluate
import torch
import argparse
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.functional import F
from torch.cuda.amp import autocast as autocast,GradScaler
import pandas as pd
from sklearn.metrics import f1_score
from torch.optim import AdamW
import pandas as pd


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_path = os.path.join("..","preprocess","dataset")
train_batch_size = 8
eval_batch_size = 8
lr = 5e-5
num_epochs = 30
model_name = "microsoft/codebert-base"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaForSequenceClassification.from_pretrained(model_name,num_labels = 66)

Some weights of the model checkpoint at microsoft/codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be 

In [3]:

def compute_metrics(eval_pred):
        metirc = evaluate.load("accuracy")
        logits , labels = eval_pred
        predictions = np.argmax(logits,axis=-1)
        return metirc.compute(predictions=predictions,references=labels)


def tokenize_function(examples):
        return tokenizer(examples["text"],truncation = True,padding=True) 

def collate_fn(examples):
    return tokenizer.pad(examples, padding="max_length", return_tensors="pt")

In [4]:
dftrain = pd.read_pickle(os.path.join(data_path,"train.pkl"))
dfvalid = pd.read_pickle(os.path.join(data_path,"valid.pkl"))
traindatasets = Dataset.from_pandas(dftrain)
validdatasets = Dataset.from_pandas(dfvalid)

In [5]:
train_tokenized_dataset = traindatasets.map(tokenize_function,batched=True,remove_columns=["text","__index_level_0__"]).rename_column("label","labels")
valid_tokenized_dataset = validdatasets.map(tokenize_function,batched=True,remove_columns=["text","__index_level_0__"]).rename_column("label","labels")
train_dataloader = DataLoader(train_tokenized_dataset,shuffle=True,collate_fn=collate_fn,batch_size = train_batch_size)
eval_dataloader = DataLoader(valid_tokenized_dataset , collate_fn=collate_fn,batch_size = eval_batch_size)
model.resize_token_embeddings(len(tokenizer))
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

                                                                  

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [6]:
optimizer = AdamW(params=model.parameters(), lr=lr)

# Instantiate scheduler

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0.06 * (len(train_dataloader) * num_epochs),
    num_training_steps=(len(train_dataloader) * num_epochs),
)


In [7]:
model.to(device)
max_eval_acc = 0
iter_to_accumlate = 4
epochloss = []
trainlogdf = pd.DataFrame(columns=["step","trainloss","validloss","acc","f1-score"])
rowindex = 0
eval_no_progress_count = 0
early_stopping = 6
early_stopping_flag = False
for epoch in range(num_epochs):
    model.train()
    allloss = 0
    for step,batch in enumerate(tqdm(train_dataloader)):
        batch.to(device)
        outputs = model(**batch)
        loss = outputs.loss/iter_to_accumlate
        loss.backward()
        allloss += loss.item()
        trainlogdf.loc[rowindex] = [rowindex,loss.item(),None,None,None]
        rowindex += 1
        epochloss.append(loss.item())
        if (step+1)%iter_to_accumlate==0:
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
        if (step+1)%(50*iter_to_accumlate) == 0:
            print("epoch",epoch,"step",step,"loss",loss,sep=" ")
        
        if (step+1)%800 == 0:
            count = 0
            model.eval()
            validloss = 0
            preds = []
            labels = []
            for evalstep,batch in enumerate(tqdm(eval_dataloader)):
                labels += batch['labels'].cpu()
                batch.to(device)
                with torch.no_grad():
                    output = model(**batch)
                validloss += output.loss.item()
                pred = torch.argmax(F.softmax(output.logits.cpu(),dim=1),dim=1)
                preds += pred
                count += int(sum(batch['labels'].cpu() == pred))
            model.train()
            eval_acc = count/2732
            trainlogdf.loc[rowindex-1,"validloss"] = validloss/2732
            trainlogdf.loc[rowindex-1,"acc"] = eval_acc
            trainlogdf.loc[rowindex-1,"f1-score"] = f1_score(np.array(labels),np.array(preds),average="macro")
            print("epoch ",epoch,"step",step,"acc ",eval_acc)
            if eval_acc < max_eval_acc:
                eval_no_progress_count += 1
                if eval_no_progress_count >=early_stopping:
                    print("Early Stopping:Epoch",epoch," Step",step,"Eval_acc",eval_acc,sep=" ")
                    early_stopping_flag = True
                    break
                else:
                    print("Early Stopping record count",eval_no_progress_count,"/",early_stopping,"Will stop","Max eval acc",max_eval_acc,sep=" ")
            if eval_acc > max_eval_acc:
                max_eval_acc = eval_acc
                print("Update Max eval acc",max_eval_acc)
                eval_no_progress_count = 0
                model.save_pretrained("CodeBERTsaved_models")
                torch.save(model.state_dict(),os.path.join("checkpoint","model.bin"))
                torch.save(optimizer.state_dict(),os.path.join("checkpoint","optimizer.bin"))
                torch.save(lr_scheduler.state_dict(),os.path.join("checkpoint","lr_scheduler.bin"))
    print("epoch",epoch,"step",step,"trainLoss:",allloss/(len(train_dataloader)*train_batch_size))
    if early_stopping_flag:
        break
trainlogdf.to_csv("trainlog.csv")
tokenizer.save_pretrained("CodeBERTsaved_models")

  7%|▋         | 200/2732 [01:20<16:56,  2.49it/s]

epoch 0 step 199 loss tensor(1.0105, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 400/2732 [02:39<15:45,  2.47it/s]

epoch 0 step 399 loss tensor(0.8993, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 600/2732 [03:58<14:22,  2.47it/s]

epoch 0 step 599 loss tensor(0.6769, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 799/2732 [05:16<12:38,  2.55it/s]

epoch 0 step 799 loss tensor(0.4618, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:47<00:00,  7.27it/s]


epoch  0 step 799 acc  0.568814055636896
Update Max eval acc 0.568814055636896


 37%|███▋      | 1000/2732 [07:34<10:51,  2.66it/s]  

epoch 0 step 999 loss tensor(0.3564, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 1200/2732 [08:48<09:36,  2.66it/s]

epoch 0 step 1199 loss tensor(0.2870, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████▏    | 1401/2732 [09:44<04:11,  5.29it/s]

epoch 0 step 1399 loss tensor(0.2456, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 1599/2732 [10:21<03:32,  5.33it/s]

epoch 0 step 1599 loss tensor(0.2483, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:22<00:00, 15.28it/s]


epoch  0 step 1599 acc  0.58199121522694
Update Max eval acc 0.58199121522694


 66%|██████▌   | 1801/2732 [11:34<02:54,  5.34it/s]  

epoch 0 step 1799 loss tensor(0.2169, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 2001/2732 [12:11<02:17,  5.31it/s]

epoch 0 step 1999 loss tensor(0.1885, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 2201/2732 [12:49<01:39,  5.32it/s]

epoch 0 step 2199 loss tensor(0.1865, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 2399/2732 [13:26<01:01,  5.39it/s]

epoch 0 step 2399 loss tensor(0.1763, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:22<00:00, 15.51it/s]


epoch  0 step 2399 acc  0.5904099560761347
Update Max eval acc 0.5904099560761347


 95%|█████████▌| 2601/2732 [14:39<00:24,  5.32it/s]

epoch 0 step 2599 loss tensor(0.2065, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 2732/2732 [15:03<00:00,  3.02it/s]


epoch 0 step 2731 trainLoss: 0.052859855352834106


  7%|▋         | 201/2732 [00:37<07:56,  5.31it/s]

epoch 1 step 199 loss tensor(0.1812, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 401/2732 [01:15<07:17,  5.33it/s]

epoch 1 step 399 loss tensor(0.2214, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 601/2732 [01:52<06:41,  5.30it/s]

epoch 1 step 599 loss tensor(0.1791, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 799/2732 [02:29<06:00,  5.36it/s]

epoch 1 step 799 loss tensor(0.1707, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:22<00:00, 15.46it/s]


epoch  1 step 799 acc  0.6255490483162518
Update Max eval acc 0.6255490483162518


 37%|███▋      | 1001/2732 [03:42<05:25,  5.32it/s] 

epoch 1 step 999 loss tensor(0.1679, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 1201/2732 [04:19<04:49,  5.29it/s]

epoch 1 step 1199 loss tensor(0.1938, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 1400/2732 [05:00<09:01,  2.46it/s]

epoch 1 step 1399 loss tensor(0.1751, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 1599/2732 [06:18<07:27,  2.53it/s]

epoch 1 step 1599 loss tensor(0.1684, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:47<00:00,  7.25it/s]
 59%|█████▊    | 1600/2732 [07:06<4:34:41, 14.56s/it]

epoch  1 step 1599 acc  0.6218887262079063
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6255490483162518


 66%|██████▌   | 1800/2732 [08:27<06:23,  2.43it/s]  

epoch 1 step 1799 loss tensor(0.1932, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 2000/2732 [09:47<04:55,  2.48it/s]

epoch 1 step 1999 loss tensor(0.2077, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 2200/2732 [11:06<03:35,  2.47it/s]

epoch 1 step 2199 loss tensor(0.1779, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 2399/2732 [12:25<02:11,  2.54it/s]

epoch 1 step 2399 loss tensor(0.2260, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:43<00:00,  7.81it/s]


epoch  1 step 2399 acc  0.6284773060029283
Update Max eval acc 0.6284773060029283


 95%|█████████▌| 2600/2732 [14:35<00:49,  2.68it/s]  

epoch 1 step 2599 loss tensor(0.1787, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 2732/2732 [15:11<00:00,  3.00it/s]


epoch 1 step 2731 trainLoss: 0.021297891773242573


  7%|▋         | 200/2732 [01:19<17:02,  2.48it/s]

epoch 2 step 199 loss tensor(0.1684, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 400/2732 [02:38<15:41,  2.48it/s]

epoch 2 step 399 loss tensor(0.1417, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 600/2732 [03:57<14:25,  2.46it/s]

epoch 2 step 599 loss tensor(0.1766, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 799/2732 [05:15<12:41,  2.54it/s]

epoch 2 step 799 loss tensor(0.1603, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:46<00:00,  7.30it/s]
 29%|██▉       | 800/2732 [06:03<7:45:53, 14.47s/it]

epoch  2 step 799 acc  0.5944363103953147
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6284773060029283


 37%|███▋      | 1000/2732 [07:22<11:42,  2.46it/s] 

epoch 2 step 999 loss tensor(0.1734, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 1200/2732 [08:35<09:30,  2.69it/s]

epoch 2 step 1199 loss tensor(0.1683, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 1400/2732 [09:48<08:19,  2.67it/s]

epoch 2 step 1399 loss tensor(0.1806, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 1599/2732 [10:51<07:28,  2.53it/s]

epoch 2 step 1599 loss tensor(0.2284, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:46<00:00,  7.28it/s]
 59%|█████▊    | 1600/2732 [11:38<4:33:28, 14.50s/it]

epoch  2 step 1599 acc  0.626281112737921
Early Stopping record count 2 / 6 Will stop Max eval acc 0.6284773060029283


 66%|██████▌   | 1800/2732 [12:57<06:17,  2.47it/s]  

epoch 2 step 1799 loss tensor(0.2461, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 2000/2732 [14:17<04:57,  2.46it/s]

epoch 2 step 1999 loss tensor(0.1690, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 2200/2732 [15:36<03:35,  2.47it/s]

epoch 2 step 2199 loss tensor(0.1232, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 2399/2732 [16:55<02:12,  2.52it/s]

epoch 2 step 2399 loss tensor(0.1406, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:46<00:00,  7.29it/s]


epoch  2 step 2399 acc  0.6394582723279648
Update Max eval acc 0.6394582723279648


 95%|█████████▌| 2600/2732 [19:08<00:49,  2.69it/s]  

epoch 2 step 2599 loss tensor(0.1291, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 2732/2732 [19:56<00:00,  2.28it/s]


epoch 2 step 2731 trainLoss: 0.01986309742291852


  7%|▋         | 200/2732 [01:03<17:03,  2.47it/s]

epoch 3 step 199 loss tensor(0.1548, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 400/2732 [02:23<15:42,  2.47it/s]

epoch 3 step 399 loss tensor(0.1881, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 600/2732 [03:42<14:18,  2.48it/s]

epoch 3 step 599 loss tensor(0.0801, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 799/2732 [05:01<12:47,  2.52it/s]

epoch 3 step 799 loss tensor(0.1619, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:47<00:00,  7.27it/s]


epoch  3 step 799 acc  0.6636163982430454
Update Max eval acc 0.6636163982430454


 37%|███▋      | 1000/2732 [07:21<11:39,  2.48it/s] 

epoch 3 step 999 loss tensor(0.1750, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 1200/2732 [08:35<09:33,  2.67it/s]

epoch 3 step 1199 loss tensor(0.1256, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 1400/2732 [09:48<08:16,  2.68it/s]

epoch 3 step 1399 loss tensor(0.1480, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 1599/2732 [11:05<07:26,  2.54it/s]

epoch 3 step 1599 loss tensor(0.1603, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:46<00:00,  7.30it/s]
 59%|█████▊    | 1600/2732 [11:52<4:33:00, 14.47s/it]

epoch  3 step 1599 acc  0.6606881405563689
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6636163982430454


 66%|██████▌   | 1800/2732 [13:11<06:18,  2.46it/s]  

epoch 3 step 1799 loss tensor(0.1494, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 2000/2732 [14:31<04:56,  2.47it/s]

epoch 3 step 1999 loss tensor(0.1120, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 2200/2732 [15:50<03:33,  2.49it/s]

epoch 3 step 2199 loss tensor(0.1189, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 2399/2732 [17:09<02:12,  2.52it/s]

epoch 3 step 2399 loss tensor(0.1549, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:45<00:00,  7.48it/s]


epoch  3 step 2399 acc  0.6676427525622255
Update Max eval acc 0.6676427525622255


 95%|█████████▌| 2600/2732 [19:22<00:49,  2.69it/s]  

epoch 3 step 2599 loss tensor(0.1274, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 2732/2732 [20:03<00:00,  2.27it/s]


epoch 3 step 2731 trainLoss: 0.01867215364311016


  7%|▋         | 200/2732 [01:11<17:10,  2.46it/s]

epoch 4 step 199 loss tensor(0.1154, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 400/2732 [02:30<15:47,  2.46it/s]

epoch 4 step 399 loss tensor(0.1277, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 600/2732 [03:50<14:26,  2.46it/s]

epoch 4 step 599 loss tensor(0.1474, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 799/2732 [05:09<12:42,  2.53it/s]

epoch 4 step 799 loss tensor(0.0674, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:46<00:00,  7.29it/s]
 29%|██▉       | 800/2732 [05:56<7:46:37, 14.49s/it]

epoch  4 step 799 acc  0.664714494875549
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6676427525622255


 37%|███▋      | 1000/2732 [07:15<11:42,  2.46it/s] 

epoch 4 step 999 loss tensor(0.1085, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 1200/2732 [08:30<09:33,  2.67it/s]

epoch 4 step 1199 loss tensor(0.0994, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 1400/2732 [09:43<08:15,  2.69it/s]

epoch 4 step 1399 loss tensor(0.1951, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 1599/2732 [10:59<07:27,  2.53it/s]

epoch 4 step 1599 loss tensor(0.1226, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:46<00:00,  7.29it/s]
 59%|█████▊    | 1600/2732 [11:47<4:33:23, 14.49s/it]

epoch  4 step 1599 acc  0.657393850658858
Early Stopping record count 2 / 6 Will stop Max eval acc 0.6676427525622255


 66%|██████▌   | 1800/2732 [13:06<06:18,  2.46it/s]  

epoch 4 step 1799 loss tensor(0.1384, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 2000/2732 [14:25<04:56,  2.47it/s]

epoch 4 step 1999 loss tensor(0.1128, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 2200/2732 [15:45<03:36,  2.46it/s]

epoch 4 step 2199 loss tensor(0.1222, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 2399/2732 [17:04<02:12,  2.52it/s]

epoch 4 step 2399 loss tensor(0.1286, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:47<00:00,  7.27it/s]
 88%|████████▊ | 2400/2732 [17:51<1:20:18, 14.51s/it]

epoch  4 step 2399 acc  0.6625183016105417
Early Stopping record count 3 / 6 Will stop Max eval acc 0.6676427525622255


 95%|█████████▌| 2600/2732 [19:11<00:53,  2.48it/s]  

epoch 4 step 2599 loss tensor(0.2251, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 2732/2732 [20:03<00:00,  2.27it/s]


epoch 4 step 2731 trainLoss: 0.01740461771988677


  7%|▋         | 200/2732 [01:19<17:11,  2.46it/s]

epoch 5 step 199 loss tensor(0.1657, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 400/2732 [02:38<15:50,  2.45it/s]

epoch 5 step 399 loss tensor(0.2444, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 600/2732 [03:58<14:29,  2.45it/s]

epoch 5 step 599 loss tensor(0.1325, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 799/2732 [05:11<11:43,  2.75it/s]

epoch 5 step 799 loss tensor(0.1003, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:43<00:00,  7.93it/s]


epoch  5 step 799 acc  0.6738653001464129
Update Max eval acc 0.6738653001464129


 37%|███▋      | 1000/2732 [07:11<11:45,  2.45it/s] 

epoch 5 step 999 loss tensor(0.0857, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 1200/2732 [08:30<10:18,  2.48it/s]

epoch 5 step 1199 loss tensor(0.0572, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 1400/2732 [09:50<08:57,  2.48it/s]

epoch 5 step 1399 loss tensor(0.1310, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 1599/2732 [11:09<07:30,  2.52it/s]

epoch 5 step 1599 loss tensor(0.1027, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:46<00:00,  7.29it/s]
 59%|█████▊    | 1600/2732 [11:56<4:33:20, 14.49s/it]

epoch  5 step 1599 acc  0.6588579795021962
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6738653001464129


 66%|██████▌   | 1800/2732 [13:15<06:17,  2.47it/s]  

epoch 5 step 1799 loss tensor(0.1680, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 2000/2732 [14:32<04:32,  2.68it/s]

epoch 5 step 1999 loss tensor(0.1112, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 2200/2732 [15:45<03:16,  2.71it/s]

epoch 5 step 2199 loss tensor(0.1792, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 2399/2732 [16:59<02:12,  2.52it/s]

epoch 5 step 2399 loss tensor(0.1139, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:46<00:00,  7.28it/s]
 88%|████████▊ | 2400/2732 [17:47<1:20:16, 14.51s/it]

epoch  5 step 2399 acc  0.6661786237188873
Early Stopping record count 2 / 6 Will stop Max eval acc 0.6738653001464129


 95%|█████████▌| 2600/2732 [19:06<00:53,  2.48it/s]  

epoch 5 step 2599 loss tensor(0.0721, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 2732/2732 [19:58<00:00,  2.28it/s]


epoch 5 step 2731 trainLoss: 0.015860981348916305


  7%|▋         | 200/2732 [01:19<17:10,  2.46it/s]

epoch 6 step 199 loss tensor(0.0806, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 400/2732 [02:38<15:47,  2.46it/s]

epoch 6 step 399 loss tensor(0.0591, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 600/2732 [03:58<14:23,  2.47it/s]

epoch 6 step 599 loss tensor(0.0355, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 799/2732 [05:11<11:39,  2.76it/s]

epoch 6 step 799 loss tensor(0.1063, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:43<00:00,  7.91it/s]
 29%|██▉       | 800/2732 [05:55<7:09:37, 13.34s/it]

epoch  6 step 799 acc  0.66398243045388
Early Stopping record count 3 / 6 Will stop Max eval acc 0.6738653001464129


 37%|███▋      | 1001/2732 [06:56<05:26,  5.29it/s] 

epoch 6 step 999 loss tensor(0.1023, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 1200/2732 [08:14<10:23,  2.46it/s]

epoch 6 step 1199 loss tensor(0.1407, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 1400/2732 [09:33<09:00,  2.47it/s]

epoch 6 step 1399 loss tensor(0.0472, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 1599/2732 [10:52<07:28,  2.52it/s]

epoch 6 step 1599 loss tensor(0.0834, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:46<00:00,  7.29it/s]
 59%|█████▊    | 1600/2732 [11:40<4:33:24, 14.49s/it]

epoch  6 step 1599 acc  0.6387262079062958
Early Stopping record count 4 / 6 Will stop Max eval acc 0.6738653001464129


 66%|██████▌   | 1800/2732 [12:59<06:17,  2.47it/s]  

epoch 6 step 1799 loss tensor(0.0777, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 2000/2732 [14:18<04:55,  2.48it/s]

epoch 6 step 1999 loss tensor(0.1288, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 2200/2732 [15:32<03:17,  2.69it/s]

epoch 6 step 2199 loss tensor(0.0690, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 2399/2732 [16:45<02:00,  2.76it/s]

epoch 6 step 2399 loss tensor(0.1775, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:45<00:00,  7.54it/s]
 88%|████████▊ | 2400/2732 [17:30<1:17:26, 14.00s/it]

epoch  6 step 2399 acc  0.6522693997071742
Early Stopping record count 5 / 6 Will stop Max eval acc 0.6738653001464129


 95%|█████████▌| 2600/2732 [18:50<00:53,  2.46it/s]  

epoch 6 step 2599 loss tensor(0.1045, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 2732/2732 [19:42<00:00,  2.31it/s]


epoch 6 step 2731 trainLoss: 0.014193942300119848


  7%|▋         | 200/2732 [01:19<17:11,  2.46it/s]

epoch 7 step 199 loss tensor(0.0582, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 400/2732 [02:38<15:46,  2.46it/s]

epoch 7 step 399 loss tensor(0.0970, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 600/2732 [03:58<14:27,  2.46it/s]

epoch 7 step 599 loss tensor(0.0889, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 799/2732 [05:15<11:40,  2.76it/s]

epoch 7 step 799 loss tensor(0.0443, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 342/342 [00:43<00:00,  7.89it/s]
 29%|██▉       | 799/2732 [05:59<14:28,  2.23it/s]


epoch  7 step 799 acc  0.6500732064421669
Early Stopping:Epoch 7  Step 799 Eval_acc 0.6500732064421669
epoch 7 step 799 trainLoss: 0.0035291198991394346


('CodeBERTsaved_models/tokenizer_config.json',
 'CodeBERTsaved_models/special_tokens_map.json',
 'CodeBERTsaved_models/vocab.json',
 'CodeBERTsaved_models/merges.txt',
 'CodeBERTsaved_models/added_tokens.json')

In [8]:
!source attack.sh

Example time cost:  0.01 min
ALL examples time cost:  0.01 min
Query times in this attack:  1
Greedy query times: 1
Ga query times: 0
All Query times:  1
Example time cost:  0.0 min
ALL examples time cost:  0.01 min
Query times in this attack:  1
Greedy query times: 1
Ga query times: 0
All Query times:  2
Example time cost:  0.0 min
ALL examples time cost:  0.01 min
Query times in this attack:  1
Greedy query times: 1
Ga query times: 0
All Query times:  3
[31m╭─[0m[31m────────────────────[0m[31m [0m[1;31mTraceback [0m[1;2;31m(most recent call last)[0m[31m [0m[31m─────────────────────[0m[31m─╮[0m
[31m│[0m [2;33m/home/ljc/desktop/neutral-attack-for-pretrained-models/CodeBERT/Defect-detec[0m [31m│[0m
[31m│[0m [2;33mtion/code/[0m[1;33mattack.py[0m:[94m195[0m in [92m<module>[0m                                          [31m│[0m
[31m│[0m                                                                              [31m│[0m
[31m│[0m   [2m192 [0m        