In [1]:
import pandas as pd
from functools import partial

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW, get_scheduler
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from tqdm import trange, tqdm

In [2]:
def get_map(labels):
    l_set = set()
    for label in labels:
        l_set.update(label)
    ids2token = list(l_set)
    token2ids = {ids2token[i] : i for i in range(len(ids2token))}
    return ids2token, token2ids

def onehot(labels, token2ids):
    vec = [0 for i in token2ids]
    for label in labels.split('| '):
        vec[token2ids[label]] = 1
    return vec

def lab(labels, token2ids):
    return [token2ids[label] for label in labels.split('| ')]

def label_vectorize(data):
    data = data.rename(columns={'Title_Description' : 'Context', 'AST' : 'AST', 'FixedByID' : 'Dev', 'Name' : 'Btype'})
    data = data[['Context', 'AST', 'Dev', 'Btype']]
    # avoid NaN in dataset
    data['Context'].fillna('[UNK]', inplace=True)
    data['AST'].fillna('[UNK]', inplace=True)
    data['Dev'].fillna('unknown', inplace=True)
    data['Btype'].fillna('unknown', inplace=True)
    
    D_labels = [label.split('| ') for label in data['Dev']]
    _D_ids2token, D_token2ids = get_map(D_labels)
    data['Dev_l'] = data['Dev'].map(partial(lab, token2ids = D_token2ids))
    data['Dev_vec'] = data['Dev'].map(partial(onehot, token2ids = D_token2ids))
    
    B_labels = [label.split('| ') for label in data['Btype']]
    _B_ids2token, B_token2ids = get_map(B_labels)
    data['Btype_l'] = data['Btype'].map(partial(lab, token2ids = B_token2ids))
    data['Btype_vec'] = data['Btype'].map(partial(onehot, token2ids = B_token2ids))
    
    return data, _D_ids2token, _B_ids2token

In [3]:
# TODO: 可能AST被截断太多
def tokenize_function(_tokenizer, example, max_seq_len = 512):
    example = example if type(example) == str else _tokenizer.unk_token
    return _tokenizer(example, padding='max_length',
                                truncation=True, max_length=max_seq_len, return_tensors="pt")

def tensor_func(example):
    return torch.tensor(example)

In [4]:
from torch.utils.data import Dataset, DataLoader

class TextCodeDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data
        
    def __getitem__(self, item):
        return self.data['Input'][item], self.data['Output'][item]
    
    def __len__(self):
        return len(self.data)

In [5]:
class MetaModel(nn.Module):
    def __init__(self, n_classes: list, seq_len = 256, vocab_size: list = [30700, 30700], emb_dim = 300, filter: list = [64, 64], linear_concat = 60):
        """
        Args:
            vocab_size: list, the size of C/A vocab
            emb_dim: : int, the dim of C&A emb layer
            seq_len:  MAX_SEQ_LEN
            n_classes: list, the output size of D/B
        """
        super(MetaModel, self).__init__()
        # 1. Embedding
        self.vocab_size = vocab_size
        self.emb_dim = emb_dim
        self.emb_C = nn.Embedding(self.vocab_size[0], self.emb_dim)
        self.emb_A = nn.Embedding(self.vocab_size[1], self.emb_dim)

        # 2. Feature Extracting separately
        self.filter_C, self.filter_A = filter
        self.seq_len = seq_len
        self.feature_C = nn.Sequential(
            # (Batch_sz, emb_dim, seq_len)
            nn.Conv1d(self.emb_dim, self.filter_C, kernel_size=3, padding='same'),
            nn.ReLU(),
            # (Batch_sz, filter_c, seq_len)
            nn.MaxPool1d(self.seq_len, 1),
            # (Batch_sz, filter_c, 1)
            nn.Flatten(),
            # (Batch_sz, filter_c)
        )
        self.feature_A = nn.Sequential(
            nn.Conv1d(self.emb_dim, self.filter_A, kernel_size=3, padding='same'),
            nn.ReLU(),
            nn.MaxPool1d(self.seq_len, 1),
            nn.Flatten(),
        )

        # 3. Joint Linear
        self.linear_concat = linear_concat
        self.fc = nn.Sequential(
            # (Batch_sz, filter_C + filter_A)
            nn.BatchNorm1d(self.filter_C + self.filter_A, affine=False),
            nn.Dropout(0.5),
            nn.Linear(self.filter_C + self.filter_A, self.linear_concat),
            # (Batch_sz, linear_concat)
            nn.ReLU()
        )

        # 4. Respective CLS
        self.n_classes_D, self.n_classes_B = n_classes
        self.fc_D = nn.Linear(self.linear_concat, self.n_classes_D)
        self.fc_B = nn.Linear(self.linear_concat, self.n_classes_B)

    def forward(self, x_C, x_A):
        # 1. Embedding
        # (Batch_sz, seq_len)
        x_C = self.emb_C(x_C)
        x_A = self.emb_A(x_A)
        # (Batch_sz, seq_len, emb_dim)
        # 2. Feature Extracting separately
        x_C = x_C.permute(0, 2, 1)
        x_A = x_A.permute(0, 2, 1)
        # (Batch_sz, emb_dim, seq_len)
        x_C = self.feature_C(x_C)
        x_A = self.feature_A(x_A)
        # (Batch_sz, filter)
        # 3. Joint Linear
        x = torch.concat((x_C, x_A), 1)
        # (Batch_sz, filter_C + filter_A)
        x = self.fc(x)
        # (Batch_sz, linear_concat)
        # 4. Respective CLS
        y_D = self.fc_D(x)
        # (Batch_sz, n_classes_D)
        y_B = self.fc_B(x)
        # (Batch_sz, n_classes_B)
        y = torch.concat((y_D, y_B), dim = 1)
        return y

In [6]:
class CustomizedBCELoss(nn.Module):
    """
    a flexible version of BCE,
    which enable the loss to focus more on the performance of positive samples' prediction
    """

    def __init__(self, weight_pos=0.8, weight_neg=0.2, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.weight_pos = weight_pos
        self.weight_neg = weight_neg

    def forward(self, x, y):
        x = nn.Sigmoid()(x)
        loss_pos = y * torch.log(x)
        loss_neg = (1 - y) * torch.log(1 - x)
        # loss = 0.8*loss_pos + 0.2*loss_neg
        loss = self.weight_pos * loss_pos + self.weight_neg * loss_neg
        return -torch.sum(loss)

In [7]:
def metrics(y: torch.Tensor, pred: torch.Tensor, split_pos: list, threshold: float = 0.5, from_logits=True):
    if from_logits:
        pred = nn.Sigmoid()(pred)
    pred = torch.where(pred > threshold, 1, 0)

    y_d, y_b = torch.split(y, split_pos, dim=1)
    pred_d, pred_b = torch.split(pred, split_pos, dim=1)

    TPd, TPb = torch.sum(y_d * pred_d, dim=1), torch.sum(y_b * pred_b, dim=1)
    TNd, TNb = torch.sum((1 - y_d) * (1 - pred_d), dim=1), torch.sum((1 - y_b) * (1 - pred_b), dim=1)
    FPd, FPb = torch.sum((1 - y_d) * pred_d, dim=1), torch.sum((1 - y_b) * pred_b, dim=1)
    FNd, FNb = torch.sum(y_d * (1 - pred_d), dim=1), torch.sum(y_b * (1 - pred_b), dim=1)

    acc = torch.mean((TPd + TNd) / (TPd + TNd + FPd + FNd + 1e-6)).item(), torch.mean(
        (TPb + TNb) / (TPb + TNb + FPb + FNb + 1e-6)).item()
    recall = torch.mean(TPd / (TPd + FNd + 1e-6)).item(), torch.mean(TPb / (TPb + FNb + 1e-6)).item()
    precision = torch.mean(TPd / (TPd + FPd + 1e-6)).item(), torch.mean(TPb / (TPb + FPb + 1e-6)).item()
    F1 = 2 * recall[0] * precision[0] / (recall[0] + precision[0] + 1e-6), 2 * recall[1] * precision[1] / (
            recall[1] + precision[1] + 1e-6)

    return {
        'acc': acc,
        'precision': precision,
        'recall': recall,
        'F1': F1
    }

In [8]:
def train_imm(_path, _logname, _loss_fn, _is_textcnn = False, _num_epochs = 20, _lr = 3e-5, _ckpt = 'bert-base-uncased', device = 'cuda' if torch.cuda.is_available() else 'cpu'):
    logname = '../res_log/' + _logname + '.txt'
    logstr = _logname + '\n' + '-'*60 + '\n'
    
    # dataset label vectorize
    dataset = pd.read_csv(_path)
    logstr += 'dataset shape:{}\n'.format(dataset.shape)
    print('dataset shape:{}'.format(dataset.shape))
    dataset, D_ids2token, B_ids2token = label_vectorize(dataset)
    n_classes = [len(D_ids2token), len(B_ids2token)]
    logstr += 'n_classes:{}\n'.format(n_classes) + '-'*60 + '\n'
    print('n_classes: ', n_classes)

    check_point = _ckpt
    tokenizer = AutoTokenizer.from_pretrained(check_point)
    # datset tensorize
    dataset['Input'] = dataset['Context'] + dataset['AST']
    dataset['Input'] = dataset['Input'].map(partial(tokenize_function, tokenizer))
    dataset['Output'] = dataset['Dev_vec'] + dataset['Btype_vec']
    dataset['Output'] = dataset['Output'].map(tensor_func)

    # split datset
    train_dataset = dataset[:int(0.64*len(dataset))].reset_index(drop=True)
    # TODO: val random -- scikit
    val_dataset = dataset[int(0.64*len(dataset)) : int(0.8*len(dataset))].reset_index(drop=True)
    test_dataset = dataset[int(0.8*len(dataset)):].reset_index(drop=True)

    # wrap dataset & dataloader
    train_dataset = TextCodeDataset(train_dataset)
    train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=16)
    val_dataset = TextCodeDataset(val_dataset)
    val_dataloader = DataLoader(val_dataset, shuffle=True, batch_size=16)
    test_dataset = TextCodeDataset(test_dataset)
    test_dataloader = DataLoader(test_dataset, shuffle=True, batch_size=16)

    # load model
    if _is_textcnn:
        model = MetaModel(n_classes = n_classes)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(
            check_point, num_labels=n_classes[0] + n_classes[1], problem_type="multi_label_classification")
    model = model.to(device)

    # loss
    loss_fn = _loss_fn.to(device)

    # optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=_lr)

    # lr_scheduler
    num_epochs = _num_epochs
    num_training_steps = num_epochs * len(train_dataloader)
    lr_scheduler = get_scheduler(
        "linear",
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=num_training_steps,
    )

    # train process
    for epoch in trange(num_epochs):
        # train
        model.train()
        train_loss = 0.0
        for x, y in train_dataloader:
            x, y = {k: v.to(device) for k, v in x.items()}, y.to(device)
            if _is_textcnn:
                outputs = model(x['input_ids'].squeeze(dim=1)[:, :256], x['input_ids'].squeeze(dim=1)[:, 256:])
            else:
                outputs = model(x['input_ids'].squeeze(dim=1), attention_mask=x['attention_mask'].squeeze(dim=1))[0]

            loss = loss_fn(outputs, y.float())
            train_loss += loss.item()/len(train_dataloader)
        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
        logstr += '{}th epoch\n train_loss: {}\n'.format(epoch, train_loss)
        # print('{}th epoch\n train_loss: {}\n'.format(epoch, train_loss))
    
        # val
        model.eval()
        val_loss, val_acc, val_f1 = 0.0, [0.0, 0.0], [0.0, 0.0]
        for x, y in val_dataloader:
            # 这里有没有现成的metrics函数可以用啊 - 库里面的
            x, y = {k: v.to(device) for k, v in x.items()}, y.to(device)
            if _is_textcnn:
                outputs = model(x['input_ids'].squeeze(dim=1)[:, :256], x['input_ids'].squeeze(dim=1)[:, 256:])
            else:
                outputs = model(x['input_ids'].squeeze(dim=1), attention_mask=x['attention_mask'].squeeze(dim=1))[0]
        
            loss = loss_fn(outputs, y.float())
            val_loss += loss.item()/len(val_dataloader)
            metric = metrics(y, outputs, split_pos = n_classes)
            val_acc[0] += metric['acc'][0]/len(val_dataloader)
            val_acc[1] += metric['acc'][1]/len(val_dataloader)
            val_f1[0] += metric['F1'][0]/len(val_dataloader)
            val_f1[1] += metric['F1'][1]/len(val_dataloader)
        logstr += '{}th epoch\n val_loss: {}\n val_acc:{}\n val_f1: {}\n'.format(epoch, val_loss, val_acc, val_f1)
        # print('{}th epoch\n val_loss: {}\n val_acc:{}\n val_f1: {}'.format(epoch, val_loss, val_acc, val_f1))

    # test
    model.eval()
    test_loss, test_acc, test_f1 = 0.0, [0.0, 0.0], [0.0, 0.0]
    for x, y in tqdm(test_dataloader):
        x, y = {k: v.to(device) for k, v in x.items()}, y.to(device)
        if _is_textcnn:
            outputs = model(x['input_ids'].squeeze(dim=1)[:, :256], x['input_ids'].squeeze(dim=1)[:, 256:])
        else:
            outputs = model(x['input_ids'].squeeze(dim=1), attention_mask=x['attention_mask'].squeeze(dim=1))[0]
                
        loss = loss_fn(outputs, y.float())
        test_loss += loss.item()/len(test_dataloader)
        metric = metrics(y, outputs, split_pos = n_classes)
        test_acc[0] += metric['acc'][0]/len(test_dataloader)
        test_acc[1] += metric['acc'][1]/len(test_dataloader)
        test_f1[0] += metric['F1'][0]/len(test_dataloader)
        test_f1[1] += metric['F1'][1]/len(test_dataloader)
    logstr += '-' * 60 + '\ntest_loss: {}\n test_acc:{}\n test_f1: {}'.format(test_loss, test_acc, test_f1)
    print('test_loss: {}\n test_acc:{}\n test_f1: {}'.format(test_loss, test_acc, test_f1))

    with open(logname, 'w') as f:
        f.write(logstr)


In [9]:
# 7 dataset
pathlist = [
    ('../Data/efcore/IssueefcoreWebScrap.csv', ' efcore'),
    ('../Data/elasticSearch/IssueelasticsearchWebScrap.csv', 'elasticSearch'),
    ('../Data/mixedRealityToolUnity/IssuemixedrealitytoolkitunityWebScrap.csv', 'mixedRealityToolkitUnity'),
    ('../Data/monoGame/IssuemonogameWebScrap.csv', 'monogame'),
    ('../Data/powershell/Issueazure-powershellWebScrap.csv', ' powershell'),
    ('../Data/realmJava/IssuerealmjavaWebScrap.csv', 'realmJava'),
    ('../Data/roslyn/IssueroslynWebScrap.csv', 'roslyn'),
]
losslist = [
    # (nn.BCEWithLogitsLoss(), ' BCE'),
    (CustomizedBCELoss(), ' CBCE'),
]

ckptlist = [
    ('bert-base-uncased', 'TextCnn'),  # just for tokenize
    ('bert-base-uncased', ' Bert'),
]

In [10]:
for path in pathlist:
    for ckpt in ckptlist:
        for loss in losslist:
            is_t = (ckpt[1] == 'TextCnn')
            logname = ' '.join([path[1], ckpt[1], loss[1]])
            print('-'*100, logname, '-'*100, sep='\n')
            train_imm(_path = path[0], _logname = logname, _loss_fn = loss[0], _is_textcnn = is_t, _ckpt = ckpt[0])

----------------------------------------------------------------------------------------------------
 efcore TextCnn  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(6612, 7)

n_classes:  [25, 58]


100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 83/83 [00:00<00:00, 398.80it/s]


test_loss: 58.37801600077067
 test_acc:[0.9704024289027755, 0.9486275803611939]
 test_f1: [0.6326659284055287, 0.33103401637796614]
----------------------------------------------------------------------------------------------------
 efcore  Bert  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(6612, 7)

n_classes:  [25, 58]


  return self.fget.__get__(instance, owner)()
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 20/20 [19:16<00:00, 57.83s/it]
100%|██████████| 83/83 [00:05<00:00, 14.21it/s]


test_loss: 58.198204638010054
 test_acc:[0.9668810741010915, 0.9689105181808929]
 test_f1: [0.627194250389262, 0.44487236337918135]
----------------------------------------------------------------------------------------------------
elasticSearch TextCnn  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(5190, 7)

n_classes:  [105, 239]


100%|██████████| 20/20 [00:19<00:00,  1.03it/s]
100%|██████████| 65/65 [00:00<00:00, 387.57it/s]


test_loss: 117.51949533315802
 test_acc:[0.9921009852336002, 0.9888115479395936]
 test_f1: [0.5973890403059429, 0.28411149366010224]
----------------------------------------------------------------------------------------------------
elasticSearch  Bert  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(5190, 7)

n_classes:  [105, 239]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 20/20 [15:10<00:00, 45.55s/it]
100%|██████████| 65/65 [00:04<00:00, 14.11it/s]


test_loss: 98.26378373366134
 test_acc:[0.992620342511397, 0.9912691299731913]
 test_f1: [0.5778835647298459, 0.3939297324276889]
----------------------------------------------------------------------------------------------------
mixedRealityToolkitUnity TextCnn  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(2294, 7)

n_classes:  [56, 125]


100%|██████████| 20/20 [00:08<00:00,  2.35it/s]
100%|██████████| 29/29 [00:00<00:00, 518.90it/s]


test_loss: 229.9492040173761
 test_acc:[0.9723431262476689, 0.9670125533794537]
 test_f1: [0.5035057354130569, 0.2679069333614074]
----------------------------------------------------------------------------------------------------
mixedRealityToolkitUnity  Bert  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(2294, 7)

n_classes:  [56, 125]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 20/20 [06:38<00:00, 19.94s/it]
100%|██████████| 29/29 [00:02<00:00, 14.31it/s]


test_loss: 72.7936168539113
 test_acc:[0.9846654020506759, 0.9868150558964961]
 test_f1: [0.6103399669761274, 0.48731759486398823]
----------------------------------------------------------------------------------------------------
monogame TextCnn  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(1008, 7)

n_classes:  [5, 29]


100%|██████████| 20/20 [00:03<00:00,  5.23it/s]
100%|██████████| 13/13 [00:00<00:00, 444.53it/s]


test_loss: 68.82819131704478
 test_acc:[0.9865382955624509, 0.8708222325031574]
 test_f1: [0.9780504947949146, 0.3289915489306137]
----------------------------------------------------------------------------------------------------
monogame  Bert  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(1008, 7)

n_classes:  [5, 29]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 20/20 [02:55<00:00,  8.79s/it]
100%|██████████| 13/13 [00:00<00:00, 14.59it/s]


test_loss: 26.67033298198993
 test_acc:[0.9961536618379446, 0.9666113440807048]
 test_f1: [0.9903831708804972, 0.6462369696195541]
----------------------------------------------------------------------------------------------------
 powershell TextCnn  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(2540, 7)

n_classes:  [334, 150]


100%|██████████| 20/20 [00:09<00:00,  2.08it/s]
100%|██████████| 32/32 [00:00<00:00, 519.52it/s]


test_loss: 341.51186752319336
 test_acc:[0.9891018532216549, 0.9786154739558697]
 test_f1: [0.1766735567725918, 0.353310928710809]
----------------------------------------------------------------------------------------------------
 powershell  Bert  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(2540, 7)

n_classes:  [334, 150]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 20/20 [07:22<00:00, 22.12s/it]
100%|██████████| 32/32 [00:02<00:00, 14.30it/s]


test_loss: 107.40738582611084
 test_acc:[0.9958598706871271, 0.9842578209936619]
 test_f1: [0.35546791099945385, 0.4141248890460474]
----------------------------------------------------------------------------------------------------
realmJava TextCnn  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(1160, 7)

n_classes:  [16, 24]


100%|██████████| 20/20 [00:04<00:00,  4.56it/s]
100%|██████████| 15/15 [00:00<00:00, 455.21it/s]


test_loss: 72.78055267333983
 test_acc:[0.937239464124044, 0.8477429747581482]
 test_f1: [0.7352861962009034, 0.30120411463555324]
----------------------------------------------------------------------------------------------------
realmJava  Bert  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(1160, 7)

n_classes:  [16, 24]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 20/20 [03:22<00:00, 10.14s/it]
100%|██████████| 15/15 [00:01<00:00, 14.62it/s]


test_loss: 36.91586329142253
 test_acc:[0.9799477974573771, 0.9350693782170612]
 test_f1: [0.8478021549570639, 0.43160216262663004]
----------------------------------------------------------------------------------------------------
roslyn TextCnn  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(5062, 7)

n_classes:  [81, 124]


100%|██████████| 20/20 [00:18<00:00,  1.06it/s]
100%|██████████| 64/64 [00:00<00:00, 514.18it/s]


test_loss: 113.55053472518921
 test_acc:[0.9873432740569115, 0.9708952959626913]
 test_f1: [0.4917959059864993, 0.379168203168294]
----------------------------------------------------------------------------------------------------
roslyn  Bert  CBCE
----------------------------------------------------------------------------------------------------
dataset shape:(5062, 7)

n_classes:  [81, 124]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 20/20 [14:42<00:00, 44.10s/it]
100%|██████████| 64/64 [00:04<00:00, 14.33it/s]


test_loss: 89.99552261829376
 test_acc:[0.9873794391751289, 0.9790212530642748]
 test_f1: [0.46915675930611195, 0.5004688569873694]
