In [1]:
import pandas as pd
from functools import partial

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW, get_scheduler
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from tqdm import trange, tqdm

In [2]:
# path = '../Data/efcore/IssueefcoreWebScrap.csv'
path = '../Data/powershell/Issueazure-powershellWebScrap.csv'
dataset = pd.read_csv(path)
dataset

Unnamed: 0,RepoID,IssueID,Title_Description,AST,FixedByID,Name,CreatedDate
0,azure-powershellWebScrap,120,get-azureaccount ignores -name parameter ive t...,,unknown,unknown,27/01/2015
1,azure-powershellWebScrap,1200,azurerm iaas feature request allow async opera...,,unknown,azure ps team,29/10/2015
2,azure-powershellWebScrap,1201,updated help.xml for arm 1.0 preview / azurerm...,,unknown,unknown,29/10/2015
3,azure-powershellWebScrap,1202,new help.xml for arm 1.0 preview / azure api m...,,unknown,unknown,29/10/2015
4,azure-powershellWebScrap,12065,cannot verify the microsoft .net framework ver...,<bm>mainvoidpredefinedtype|methoddeclaration|p...,unknown,azure ps team| more info label| customer-repor...,04/06/2020
...,...,...,...,...,...,...,...
2535,azure-powershellWebScrap,9238,workspace id not found for oms resource < -- ...,<bm>mainvoidpredefinedtype|methoddeclaration|p...,yoramsinger,monitor - alerts| service attention,17/05/2019
2536,azure-powershellWebScrap,9239,how to add app insights to function app? i hop...,,francisco-gamino,functions| service attention| question,17/05/2019
2537,azure-powershellWebScrap,9241,add ecc certificate support to azure keyvault ...,,unknown,generator| keyvault| service attention,18/05/2019
2538,azure-powershellWebScrap,9244,get-azkeyvault output object properties with s...,<bm>mainvoidpredefinedtype|methoddeclaration|p...,unknown,azure ps team,18/05/2019


In [3]:
def get_map(labels):
    l_set = set()
    for label in labels:
        l_set.update(label)
    ids2token = list(l_set)
    token2ids = {ids2token[i] : i for i in range(len(ids2token))}
    return ids2token, token2ids

def onehot(labels, token2ids):
    vec = [0 for i in token2ids]
    for label in labels.split('| '):
        vec[token2ids[label]] = 1
    return vec

def lab(labels, token2ids):
    return [token2ids[label] for label in labels.split('| ')]

def label_vectorize(data):
    data = data.rename(columns={'Title_Description' : 'Context', 'AST' : 'AST', 'FixedByID' : 'Dev', 'Name' : 'Btype'})
    data = data[['Context', 'AST', 'Dev', 'Btype']]
    # avoid NaN in dataset
    data['Context'].fillna('[UNK]', inplace=True)
    data['AST'].fillna('[UNK]', inplace=True)
    
    D_labels = [label.split('| ') for label in data['Dev']]
    _D_ids2token, D_token2ids = get_map(D_labels)
    data['Dev_l'] = data['Dev'].map(partial(lab, token2ids = D_token2ids))
    data['Dev_vec'] = data['Dev'].map(partial(onehot, token2ids = D_token2ids))
    
    B_labels = [label.split('| ') for label in data['Btype']]
    _B_ids2token, B_token2ids = get_map(B_labels)
    data['Btype_l'] = data['Btype'].map(partial(lab, token2ids = B_token2ids))
    data['Btype_vec'] = data['Btype'].map(partial(onehot, token2ids = B_token2ids))
    
    return data, _D_ids2token, _B_ids2token


dataset, D_ids2token, B_ids2token = label_vectorize(dataset)
n_classes = [len(D_ids2token), len(B_ids2token)]
# data.to_csv('efcore.csv')
dataset

Unnamed: 0,Context,AST,Dev,Btype,Dev_l,Dev_vec,Btype_l,Btype_vec
0,get-azureaccount ignores -name parameter ive t...,[UNK],unknown,unknown,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[121],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,azurerm iaas feature request allow async opera...,[UNK],unknown,azure ps team,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[134],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,updated help.xml for arm 1.0 preview / azurerm...,[UNK],unknown,unknown,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[121],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,new help.xml for arm 1.0 preview / azure api m...,[UNK],unknown,unknown,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[121],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,cannot verify the microsoft .net framework ver...,<bm>mainvoidpredefinedtype|methoddeclaration|p...,unknown,azure ps team| more info label| customer-repor...,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[134, 75, 119, 128]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...
2535,workspace id not found for oms resource < -- ...,<bm>mainvoidpredefinedtype|methoddeclaration|p...,yoramsinger,monitor - alerts| service attention,[239],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[89, 38]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2536,how to add app insights to function app? i hop...,[UNK],francisco-gamino,functions| service attention| question,[330],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[50, 38, 128]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2537,add ecc certificate support to azure keyvault ...,[UNK],unknown,generator| keyvault| service attention,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[137, 145, 38]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2538,get-azkeyvault output object properties with s...,<bm>mainvoidpredefinedtype|methoddeclaration|p...,unknown,azure ps team,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[134],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [4]:
n_classes

[334, 150]

In [5]:
class MetaModel(nn.Module):
    def __init__(self, seq_len = 256, vocab_size: list = [30700, 30700], emb_dim = 300, filter: list = [64, 64], linear_concat = 60, n_classes: list = n_classes):
        """
        Args:
            vocab_size: list, the size of C/A vocab
            emb_dim: : int, the dim of C&A emb layer
            seq_len:  MAX_SEQ_LEN
            n_classes: list, the output size of D/B
        """
        super(MetaModel, self).__init__()
        # 1. Embedding
        self.vocab_size = vocab_size
        self.emb_dim = emb_dim
        self.emb_C = nn.Embedding(self.vocab_size[0], self.emb_dim)
        self.emb_A = nn.Embedding(self.vocab_size[1], self.emb_dim)

        # 2. Feature Extracting separately
        self.filter_C, self.filter_A = filter
        self.seq_len = seq_len
        self.feature_C = nn.Sequential(
            # (Batch_sz, emb_dim, seq_len)
            nn.Conv1d(self.emb_dim, self.filter_C, kernel_size=3, padding='same'),
            nn.ReLU(),
            # (Batch_sz, filter_c, seq_len)
            nn.MaxPool1d(self.seq_len, 1),
            # (Batch_sz, filter_c, 1)
            nn.Flatten(),
            # (Batch_sz, filter_c)
        )
        self.feature_A = nn.Sequential(
            nn.Conv1d(self.emb_dim, self.filter_A, kernel_size=3, padding='same'),
            nn.ReLU(),
            nn.MaxPool1d(self.seq_len, 1),
            nn.Flatten(),
        )

        # 3. Joint Linear
        self.linear_concat = linear_concat
        self.fc = nn.Sequential(
            # (Batch_sz, filter_C + filter_A)
            nn.BatchNorm1d(self.filter_C + self.filter_A, affine=False),
            nn.Dropout(0.5),
            nn.Linear(self.filter_C + self.filter_A, self.linear_concat),
            # (Batch_sz, linear_concat)
            nn.ReLU()
        )

        # 4. Respective CLS
        self.n_classes_D, self.n_classes_B = n_classes
        self.fc_D = nn.Linear(self.linear_concat, self.n_classes_D)
        self.fc_B = nn.Linear(self.linear_concat, self.n_classes_B)

    def forward(self, x_C, x_A):
        # 1. Embedding
        # (Batch_sz, seq_len)
        x_C = self.emb_C(x_C)
        x_A = self.emb_A(x_A)
        # (Batch_sz, seq_len, emb_dim)
        # 2. Feature Extracting separately
        x_C = x_C.permute(0, 2, 1)
        x_A = x_A.permute(0, 2, 1)
        # (Batch_sz, emb_dim, seq_len)
        x_C = self.feature_C(x_C)
        x_A = self.feature_A(x_A)
        # (Batch_sz, filter)
        # 3. Joint Linear
        x = torch.concat((x_C, x_A), 1)
        # (Batch_sz, filter_C + filter_A)
        x = self.fc(x)
        # (Batch_sz, linear_concat)
        # 4. Respective CLS
        y_D = self.fc_D(x)
        # (Batch_sz, n_classes_D)
        y_B = self.fc_B(x)
        # (Batch_sz, n_classes_B)
        y = torch.concat((y_D, y_B), dim = 1)
        return y

In [6]:
check_point = 'bert-base-uncased'

tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=n_classes[0] + n_classes[1], problem_type="multi_label_classification")
# model = MetaModel()

  return self.fget.__get__(instance, owner)()
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# TODO: 可能AST被截断太多
def tokenize_function(_tokenizer, example, max_seq_len = 512):
    example = example if type(example) == str else _tokenizer.unk_token
    return _tokenizer(example, padding='max_length',
                                truncation=True, max_length=max_seq_len, return_tensors="pt")

def tensor_func(example):
    return torch.tensor(example)

# dataset['Context'] = dataset['Context'].map(partial(tokenize_function, tokenizer))
# dataset['AST'] = dataset['AST'].map(partial(tokenize_function, tokenizer))
dataset['Input'] = dataset['Context'] + dataset['AST']
dataset['Input'] = dataset['Input'].map(partial(tokenize_function, tokenizer))
# dataset['Dev_l'] = dataset['Dev_l'].map(tensor_func)
# dataset['Dev_vec'] = dataset['Dev_vec'].map(tensor_func)
# dataset['Btype_l'] = dataset['Btype_l'].map(tensor_func)
# dataset['Btype_vec'] = dataset['Btype_vec'].map(tensor_func)
dataset['Output'] = dataset['Dev_vec'] + dataset['Btype_vec']
dataset['Output'] = dataset['Output'].map(tensor_func)
dataset

Unnamed: 0,Context,AST,Dev,Btype,Dev_l,Dev_vec,Btype_l,Btype_vec,Input,Output
0,get-azureaccount ignores -name parameter ive t...,[UNK],unknown,unknown,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[121],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[input_ids, token_type_ids, attention_mask]","[tensor(0), tensor(0), tensor(0), tensor(0), t..."
1,azurerm iaas feature request allow async opera...,[UNK],unknown,azure ps team,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[134],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[input_ids, token_type_ids, attention_mask]","[tensor(0), tensor(0), tensor(0), tensor(0), t..."
2,updated help.xml for arm 1.0 preview / azurerm...,[UNK],unknown,unknown,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[121],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[input_ids, token_type_ids, attention_mask]","[tensor(0), tensor(0), tensor(0), tensor(0), t..."
3,new help.xml for arm 1.0 preview / azure api m...,[UNK],unknown,unknown,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[121],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[input_ids, token_type_ids, attention_mask]","[tensor(0), tensor(0), tensor(0), tensor(0), t..."
4,cannot verify the microsoft .net framework ver...,<bm>mainvoidpredefinedtype|methoddeclaration|p...,unknown,azure ps team| more info label| customer-repor...,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[134, 75, 119, 128]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[input_ids, token_type_ids, attention_mask]","[tensor(0), tensor(0), tensor(0), tensor(0), t..."
...,...,...,...,...,...,...,...,...,...,...
2535,workspace id not found for oms resource < -- ...,<bm>mainvoidpredefinedtype|methoddeclaration|p...,yoramsinger,monitor - alerts| service attention,[239],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[89, 38]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[input_ids, token_type_ids, attention_mask]","[tensor(0), tensor(0), tensor(0), tensor(0), t..."
2536,how to add app insights to function app? i hop...,[UNK],francisco-gamino,functions| service attention| question,[330],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[50, 38, 128]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[input_ids, token_type_ids, attention_mask]","[tensor(0), tensor(0), tensor(0), tensor(0), t..."
2537,add ecc certificate support to azure keyvault ...,[UNK],unknown,generator| keyvault| service attention,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[137, 145, 38]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[input_ids, token_type_ids, attention_mask]","[tensor(0), tensor(0), tensor(0), tensor(0), t..."
2538,get-azkeyvault output object properties with s...,<bm>mainvoidpredefinedtype|methoddeclaration|p...,unknown,azure ps team,[226],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[134],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[input_ids, token_type_ids, attention_mask]","[tensor(0), tensor(0), tensor(0), tensor(0), t..."


In [8]:
train_dataset = dataset[:int(0.64*len(dataset))].reset_index(drop=True)
# TODO: val random -- scikit
val_dataset = dataset[int(0.64*len(dataset)) : int(0.8*len(dataset))].reset_index(drop=True)
test_dataset = dataset[int(0.8*len(dataset)):].reset_index(drop=True)

In [9]:
from torch.utils.data import Dataset, DataLoader


class TextCodeDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data
        
    def __getitem__(self, item):
        return self.data['Input'][item], self.data['Output'][item]
    
    def __len__(self):
        return len(self.data)
    
train_dataset = TextCodeDataset(train_dataset)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=16)
val_dataset = TextCodeDataset(val_dataset)
val_dataloader = DataLoader(val_dataset, shuffle=True, batch_size=16)
test_dataset = TextCodeDataset(test_dataset)
test_dataloader = DataLoader(test_dataset, shuffle=True, batch_size=16)

In [10]:
class CustomizedBCELoss(nn.Module):
    """
    a flexible version of BCE,
    which enable the loss to focus more on the performance of positive samples' prediction
    """

    def __init__(self, weight_pos=0.8, weight_neg=0.2, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.weight_pos = weight_pos
        self.weight_neg = weight_neg

    def forward(self, x, y):
        x = nn.Sigmoid()(x)
        loss_pos = y * torch.log(x)
        loss_neg = (1 - y) * torch.log(1 - x)
        # loss = 0.8*loss_pos + 0.2*loss_neg
        loss = self.weight_pos * loss_pos + self.weight_neg * loss_neg
        return -torch.sum(loss)

In [11]:
def metrics(y: torch.Tensor, pred: torch.Tensor, split_pos: list = n_classes, threshold: float = 0.5, from_logits=True):
    if from_logits:
        pred = nn.Sigmoid()(pred)
    pred = torch.where(pred > threshold, 1, 0)

    y_d, y_b = torch.split(y, split_pos, dim=1)
    pred_d, pred_b = torch.split(pred, split_pos, dim=1)

    TPd, TPb = torch.sum(y_d * pred_d, dim=1), torch.sum(y_b * pred_b, dim=1)
    TNd, TNb = torch.sum((1 - y_d) * (1 - pred_d), dim=1), torch.sum((1 - y_b) * (1 - pred_b), dim=1)
    FPd, FPb = torch.sum((1 - y_d) * pred_d, dim=1), torch.sum((1 - y_b) * pred_b, dim=1)
    FNd, FNb = torch.sum(y_d * (1 - pred_d), dim=1), torch.sum(y_b * (1 - pred_b), dim=1)

    acc = torch.mean((TPd + TNd) / (TPd + TNd + FPd + FNd + 1e-6)).item(), torch.mean(
        (TPb + TNb) / (TPb + TNb + FPb + FNb + 1e-6)).item()
    recall = torch.mean(TPd / (TPd + FNd + 1e-6)).item(), torch.mean(TPb / (TPb + FNb + 1e-6)).item()
    precision = torch.mean(TPd / (TPd + FPd + 1e-6)).item(), torch.mean(TPb / (TPb + FPb + 1e-6)).item()
    F1 = 2 * recall[0] * precision[0] / (recall[0] + precision[0] + 1e-6), 2 * recall[1] * precision[1] / (
            recall[1] + precision[1] + 1e-6)

    return {
        'acc': acc,
        'precision': precision,
        'recall': recall,
        'F1': F1
    }

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5)

# loss_fn = nn.BCEWithLogitsLoss().to(device)
loss_fn = CustomizedBCELoss().to(device)

num_epochs = 10
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)

In [13]:
from tqdm import tqdm

for epoch in trange(num_epochs):
    # train
    model.train()
    train_loss = 0.0
    for x, y in train_dataloader:
        x, y = {k: v.to(device) for k, v in x.items()}, y.to(device)
        outputs = model(x['input_ids'].squeeze(dim=1), attention_mask=x['attention_mask'].squeeze(dim=1))[0]
        # outputs = model(x['input_ids'].squeeze(dim=1)[:, :256], x['input_ids'].squeeze(dim=1)[:, 256:])

        loss = loss_fn(outputs, y.float())
        train_loss += loss.item()/len(train_dataloader)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
    print('{}th epoch\n train_loss: {}\n'.format(epoch, train_loss))
    
    # val
    model.eval()
    val_loss, val_acc, val_f1 = 0.0, [0.0, 0.0], [0.0, 0.0]
    for x, y in val_dataloader:
        # 这里有没有现成的metrics函数可以用啊 - 库里面的
        x, y = {k: v.to(device) for k, v in x.items()}, y.to(device)
        outputs = model(x['input_ids'].squeeze(dim=1), attention_mask=x['attention_mask'].squeeze(dim=1))[0]
        # outputs = model(x['input_ids'].squeeze(dim=1)[:, :256], x['input_ids'].squeeze(dim=1)[:, 256:])
        
        loss = loss_fn(outputs, y.float())
        val_loss += loss.item()/len(val_dataloader)
        metric = metrics(y, outputs)
        val_acc[0] += metric['acc'][0]/len(val_dataloader)
        val_acc[1] += metric['acc'][1]/len(val_dataloader)
        val_f1[0] += metric['F1'][0]/len(val_dataloader)
        val_f1[1] += metric['F1'][1]/len(val_dataloader)
        
    print('{}th epoch\n val_loss: {}\n val_acc:{}\n val_f1: {}'.format(epoch, val_loss, val_acc, val_f1))

  0%|          | 0/10 [00:00<?, ?it/s]

0th epoch
 train_loss: 618.867682064281



 10%|█         | 1/10 [00:21<03:14, 21.64s/it]

0th epoch
 val_loss: 274.3553466796875
 val_acc:[0.9966492263170386, 0.9847230063034939]
 val_f1: [0.4776776178443618, 0.4198561279807075]
1th epoch
 train_loss: 210.25764061422902



 20%|██        | 2/10 [00:43<02:52, 21.50s/it]

1th epoch
 val_loss: 152.26613000723032
 val_acc:[0.9965751927632549, 0.9847024037287786]
 val_f1: [0.46531499096173373, 0.4219440276127432]
2th epoch
 train_loss: 147.8519323012408



 30%|███       | 3/10 [01:04<02:30, 21.49s/it]

2th epoch
 val_loss: 123.80674362182614
 val_acc:[0.9965937022979442, 0.9847436042932363]
 val_f1: [0.4684056476823179, 0.42081042509061856]
3th epoch
 train_loss: 129.56329203586958



 40%|████      | 4/10 [01:26<02:09, 21.50s/it]

3th epoch
 val_loss: 112.87237167358394
 val_acc:[0.9965844475305996, 0.9846611917018888]
 val_f1: [0.4684056476823346, 0.41577001937411256]
4th epoch
 train_loss: 121.64714790793032



 50%|█████     | 5/10 [01:47<01:47, 21.50s/it]

4th epoch
 val_loss: 107.49152403611404
 val_acc:[0.996621462015005, 0.9847023922663471]
 val_f1: [0.47458696112371423, 0.4183058124442277]
5th epoch
 train_loss: 117.61344689013913



 60%|██████    | 6/10 [02:09<01:25, 21.50s/it]

5th epoch
 val_loss: 104.6361312866211
 val_acc:[0.9966122072476605, 0.984681798861577]
 val_f1: [0.4714963044030419, 0.41999400829742883]
6th epoch
 train_loss: 115.13303098491595



 70%|███████   | 7/10 [02:30<01:04, 21.50s/it]

6th epoch
 val_loss: 102.90813856858473
 val_acc:[0.996612211832633, 0.9847230063034937]
 val_f1: [0.47149630440300794, 0.4191382991029497]
7th epoch
 train_loss: 113.63295510235956



 80%|████████  | 8/10 [02:52<00:42, 21.50s/it]

7th epoch
 val_loss: 101.90564669095554
 val_acc:[0.9966307236598085, 0.984619977382513]
 val_f1: [0.47458696112367954, 0.4143154734868918]
8th epoch
 train_loss: 112.7410660538019



 90%|█████████ | 9/10 [03:13<00:21, 21.50s/it]

8th epoch
 val_loss: 101.34389290442834
 val_acc:[0.9966307213673222, 0.9847642091604379]
 val_f1: [0.47458696112367715, 0.4193210689878844]
9th epoch
 train_loss: 112.4302392099418



100%|██████████| 10/10 [03:35<00:00, 21.50s/it]

9th epoch
 val_loss: 101.17094883551964
 val_acc:[0.9966307213673222, 0.9847436042932364]
 val_f1: [0.4745869611236821, 0.41925889493054525]





In [14]:
# test
model.eval()
test_loss, test_acc, test_f1 = 0.0, [0.0, 0.0], [0.0, 0.0]
for x, y in tqdm(test_dataloader):
    x, y = {k: v.to(device) for k, v in x.items()}, y.to(device)
    outputs = model(x['input_ids'].squeeze(dim=1), attention_mask=x['attention_mask'].squeeze(dim=1))[0]
    # outputs = model(x['input_ids'].squeeze(dim=1)[:, :256], x['input_ids'].squeeze(dim=1)[:, 256:])
    
    loss = loss_fn(outputs, y.float())
    test_loss += loss.item()/len(test_dataloader)
    metric = metrics(y, outputs)
    test_acc[0] += metric['acc'][0]/len(test_dataloader)
    test_acc[1] += metric['acc'][1]/len(test_dataloader)
    test_f1[0] += metric['F1'][0]/len(test_dataloader)
    test_f1[1] += metric['F1'][1]/len(test_dataloader)
    
print('test_loss: {}\n test_acc:{}\n test_f1: {}'.format(test_loss, test_acc, test_f1))

100%|██████████| 32/32 [00:02<00:00, 14.93it/s]

test_loss: 111.57229113578796
 test_acc:[0.9958735127002001, 0.9842578284442425]
 test_f1: [0.3580720751825264, 0.4140237726267897]





In [15]:
n_classes

[334, 150]