<a href="https://colab.research.google.com/github/Sansith/gpt2sp/blob/bertsp/model_training_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model Training Script

### Necessary Library

In [1]:
pip install torch pandas===1.5.3 transformers numpy tokenizers koila tensorboard

Collecting koila
  Downloading koila-0.1.1-py3-none-any.whl (18 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x

In [1]:
import pdb

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
cd drive/MyDrive/Year4/FYP/effort-estimation/gpt2sp

/content/drive/MyDrive/Year4/FYP/effort-estimation/gpt2sp


In [4]:
import torch
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import GPT2Tokenizer, AdamW, get_linear_schedule_with_warmup , BertTokenizer
import numpy as np
import time
from torch.utils.tensorboard import SummaryWriter
from GPT2SP import GPT2ForSequenceClassification as GPT2SP
from transformers import GPT2ForSequenceClassification as LinearGPT2
from transformers import GPT2Config , BertConfig
import os
from tokenizers import Tokenizer
import torch.nn as nn

In [5]:

from transformers.modeling_outputs import SequenceClassifierOutputWithPast
import torch.nn as nn
from transformers import  BertPreTrainedModel , BertModel
import torch


class BertSP(BertPreTrainedModel):
    _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head\.weight"]

    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.transformer = BertModel(config)
        print("n_embd/hidden_size : ", config.hidden_size)
        self.dense1 = nn.Linear(config.hidden_size, 4 * config.hidden_size, bias=False)
        self.dense2 = nn.Linear(4 * config.hidden_size, config.hidden_size, bias=False)
        self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)

        self.init_weights()

        # Model parallel
        self.model_parallel = False
        self.device_map = None


    def forward(
        self,
        input_ids=None,
        past_key_values=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        use_cache=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        transformer_outputs = self.transformer(
            input_ids,
            past_key_values=past_key_values,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        hidden_states = transformer_outputs[0]

        # MLP Layer
        hidden_states = self.dense1(hidden_states)
        hidden_states = self.dense2(hidden_states)

        logits = self.score(hidden_states)

        if input_ids is not None:
            batch_size, sequence_length = input_ids.shape[:2]
        else:
            batch_size, sequence_length = inputs_embeds.shape[:2]

        assert (
            self.config.pad_token_id is not None or batch_size == 1
        ), "Cannot handle batch sizes > 1 if no padding token is defined."
        if self.config.pad_token_id is None:
            sequence_lengths = -1
        else:
            if input_ids is not None:
                sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1
            else:
                sequence_lengths = -1
                logger.warning(
                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
                    f"unexpected if using padding tokens in conjunction with `inputs_embeds.`"
                )

        pooled_logits = logits[range(batch_size), sequence_lengths]

        loss = None
        if labels is not None:
            if self.num_labels == 1:
                #  We are doing regression
                loss_fct = nn.L1Loss()
                loss = loss_fct(pooled_logits.view(-1), labels.to(self.dtype).view(-1))
            else:
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (pooled_logits,) + transformer_outputs[1:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutputWithPast(
            loss=loss,
            logits=pooled_logits,
            past_key_values=transformer_outputs.past_key_values,
            hidden_states=transformer_outputs.hidden_states,
            attentions=transformer_outputs.attentions,
        )

### Hyperparameters

In [6]:
global EPOCHS, BATCH_SIZE_RATIO, SEQUENCE_LEN, LEARNING_RATE, TOKENIZER, MODEL_NAME , ADD_DESCRIPTION

EPOCHS = 20
BATCH_SIZE_RATIO = 0.3 # within proj: 0.3 / cross proj: 0.4
SEQUENCE_LEN = 100
LEARNING_RATE = 5e-4
TOKENIZER = 'bert' # available:bert, gpt2, wordlevel, sentencepiece, wordpiece
MODEL_NAME = 'bert' # available: bert, gpt2sp, gpt2
ADD_DESCRIPTION = False

# define device
global DEVICE
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# define files to be used
global DATA_PATH
DATA_PATH = './sp_dataset/marked_data/'

### Static Methods and Variables

In [7]:
OUTPUT = '  '
MODEL = None
DYNAMIC_BATCH = True
BATCH_SIZE = None
WITHIN_PROJECT = None
MAE_RECORDS = []
MDAE_RECORDS = []

def data_processing(file_pair):
    global BATCH_SIZE, BATCH_SIZE_RATIO, DATA_PATH, WITHIN_PROJECT, DYNAMIC_BATCH

    train_data = pd.DataFrame(columns=['text', 'label'])
    for train_file_name in file_pair['train']:
        fname = DATA_PATH + train_file_name + '.csv'
        df = prepare_dataframe(fname)
        train_data = train_data.append(df)

    # data split
    if WITHIN_PROJECT:
        train_text, train_labels, val_text, val_labels, test_text, test_labels = within_project_split(train_data)
    else:
        train_text, train_labels, val_text, val_labels = train_val_split(train_data, 0.6)
    # define batch size dynamically based on training length
    if DYNAMIC_BATCH:
        BATCH_SIZE = int(len(train_text) * BATCH_SIZE_RATIO)
    # tokenization
    tokens_train = tokenization(train_text.tolist())
    tokens_val = tokenization(val_text.tolist())
    print(tokens_train['input_ids'][:5])

    train_seq = torch.tensor(tokens_train['input_ids'])
    train_y = torch.tensor(train_labels.tolist()).type(torch.LongTensor)
    train_dataloader = prepare_dataloader(train_seq, train_y, sampler_type='random')

    val_seq = torch.tensor(tokens_val['input_ids'])
    val_y = torch.tensor(val_labels.tolist()).type(torch.LongTensor)
    val_dataloader = prepare_dataloader(val_seq, val_y, sampler_type='sequential')

    # prepare testing datasets
    all_test_dataloader = []
    test_file_names = []
    if WITHIN_PROJECT:
        tokens_test = tokenization(test_text.tolist())
        test_seq = torch.tensor(tokens_test['input_ids'])
        test_y = torch.tensor(test_labels.tolist()).type(torch.LongTensor)
        test_dataloader = prepare_dataloader(test_seq, test_y, sampler_type='sequential')
        all_test_dataloader.append(test_dataloader)
        test_file_names.append(file_pair['test'][0])
        return file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names

    for test_file_name in file_pair['test']:
        fname = DATA_PATH + test_file_name + '.csv'
        test_data = prepare_dataframe(fname)

        test_text = test_data['text']
        test_labels = test_data['label']

        # tokenization
        tokens_test = tokenization(test_text.tolist())
        test_seq = torch.tensor(tokens_test['input_ids'])
        test_y = torch.tensor(test_labels.tolist()).type(torch.LongTensor)
        test_dataloader = prepare_dataloader(test_seq, test_y, sampler_type='sequential')

        all_test_dataloader.append(test_dataloader)
        test_file_names.append(test_file_name)
    print('cross project data processing!')
    return file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names


def train_val_split(data, split_ratio):
    print('cross project split!')
    split_point = int(len(data) * split_ratio)
    train_text = data['text'][:split_point]
    train_labels = data['label'][:split_point]
    val_text = data['text'][split_point:]
    val_labels = data['label'][split_point:]
    return train_text, train_labels, val_text, val_labels


def tokenization(text_list):
    global TOKENIZER, SEQUENCE_LEN, MODEL
    # tokenization
    if TOKENIZER == 'wordpiece':
        print('using wordpiece tokenizer!')
        tokenizer = BertTokenizer('all_tokenizers/word_piece/vocab.txt')
    elif TOKENIZER == 'sentencepiece':
        print('using sentencepiece tokenizer!')
        tokenizer = XLNetTokenizer('all_tokenizers/sentence_piece/spm_tokenizer.model', padding_side='right')
    elif TOKENIZER == 'wordlevel':
        print('using wordlevel tokenizer!')
        tokenizer = Tokenizer.from_file('all_tokenizers/word_level/wordlevel.json')
        encoded_sentences = {'input_ids':[]}
        for sentence in text_list:
            encoded = tokenizer.encode(sentence)
            encoded = encoded.ids
            if len(encoded) > SEQUENCE_LEN:
                encoded = encoded[:SEQUENCE_LEN]
            elif len(encoded) < SEQUENCE_LEN:
                padding = SEQUENCE_LEN - len(encoded)
                for _ in range(padding):
                    encoded.append(3)
            encoded_sentences['input_ids'].append(encoded)
        return encoded_sentences
    elif TOKENIZER == 'gpt2':
        print('using pretrained gpt-2 tokenizer')
        tokenizer = GPT2Tokenizer.from_pretrained(TOKENIZER)
        tokenizer.pad_token = '[PAD]'

    elif TOKENIZER == 'bert':
        print('usingbert tokenizer')
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        # tokenizer.pad_token = '[PAD]'
    return tokenizer.batch_encode_plus(text_list, truncation=True, max_length=SEQUENCE_LEN, padding='max_length')


def prepare_dataframe(file_name):
    data = pd.read_csv(file_name)
    # some rows have no description, fill blank to avoid Null
    data = data.fillna(' ')


    if ADD_DESCRIPTION :
      print("### text : title+description")
      d = {'text': (data['title'] + " : " + data["description"]).tolist(), 'label': data['storypoint']}
    else:
      print("### text : title")
      d = {'text': (data['title']).tolist(), 'label': data['storypoint']}
    print("Input data feed ::: ",d['text'][0])
    return pd.DataFrame(data=d)


def prepare_dataloader(seq, y, sampler_type):
    global BATCH_SIZE
    tensor_dataset = TensorDataset(seq, y)
    if sampler_type == 'random':
        sampler = RandomSampler(tensor_dataset)
    elif sampler_type == 'sequential':
        sampler = SequentialSampler(tensor_dataset)
    print("BATCH_SIZE : ",BATCH_SIZE)
    dataloader = DataLoader(tensor_dataset, sampler=sampler, batch_size=BATCH_SIZE)
    return dataloader


def within_project_split(data):
    print('within project split!')
    train_val_split_point = int(len(data) * 0.6)
    val_test_split_point = int(len(data) * 0.8)
    train_text = data['text'][:train_val_split_point]
    train_labels = data['label'][:train_val_split_point]
    val_text = data['text'][train_val_split_point:val_test_split_point]
    val_labels = data['label'][train_val_split_point:val_test_split_point]
    test_text = data['text'][val_test_split_point:]
    test_labels = data['label'][val_test_split_point:]
    return train_text, train_labels, val_text, val_labels, test_text, test_labels


def train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, model, test_file_names):
    global LEARNING_RATE, EPOCHS, MAE_RECORDS, MDAE_RECORDS, DEVICE

    # Optimizerrr -->
    optimizer = AdamW(MODEL.parameters(), lr=LEARNING_RATE)
    # Total number of training steps is [number of batches] x [number of epochs]
    total_steps = len(train_dataloader) * EPOCHS
    # Create the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
    print("Start training for ", file_pair, ".....")
    training_start_time = time.time()

    # tensorboard writer
    writer_path = 'tb/' + str(file_pair['train'][0]) + '_' + str(file_pair['test'][0])
    writer = SummaryWriter(writer_path)

    # vars for model selection
    min_eval_loss_epoch = [10000, 0]

    time_records = []
    MAE_RECORDS = []
    MDAE_RECORDS = []
    start_time = time.time()
    loss_fct = nn.L1Loss()
    for e in range(EPOCHS):
        # ---TRAINING---
        # clean GPU memory
        torch.cuda.empty_cache()
        print(">>> epoch ", e)
        # set model into train mode
        model.train()
        total_train_loss = 0
        for step, batch in enumerate(train_dataloader):
            # pdb.set_trace()
            b_input_ids = batch[0].to(DEVICE)
            b_labels = batch[1].to(DEVICE)
            model.zero_grad()
            result = model(b_input_ids,
                           labels=b_labels,
                           return_dict=True)
            loss = result.loss
            logits = result.logits
            total_train_loss += loss.item()
            # Calculates the gradients
            loss.backward()
            # The clip_grad_norm_ function clips (limits) the norm (magnitude) of the gradients to a maximum value specified by the user.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            #updates the weights and bias accrding to the calculated gradients
            optimizer.step()
            # update learning rates
            scheduler.step()
            # clean memory
            del step, batch, b_input_ids, b_labels, result, loss, logits

        avg_train_loss = total_train_loss / len(train_dataloader)
        print(" Average training MAE loss: {0:.2f}".format(avg_train_loss))
        writer.add_scalar('loss/train', avg_train_loss, e)
        # clean memory
        del avg_train_loss, total_train_loss

        time_records.append(time.time() - start_time)

        # ---EVAL---
        print("-")
        # set model into eval mode
        model.eval()
        total_eval_loss = 0
        for batch in val_dataloader:
            b_input_ids = batch[0].to(DEVICE)
            b_labels = batch[1].to(DEVICE)
            model.zero_grad()
            result = model(b_input_ids,
                           labels=b_labels,
                           return_dict=True)
            loss = result.loss
            logits = result.logits
            total_eval_loss += loss.item()
            # clean memory
            del b_input_ids, b_labels, batch, result, loss, logits
        avg_eval_loss = total_eval_loss / len(val_dataloader)
        print(" Average eval MAE loss: {0:.2f}".format(avg_eval_loss))

        if avg_eval_loss <= min_eval_loss_epoch[0]:
            min_eval_loss_epoch[0] = avg_eval_loss
            min_eval_loss_epoch[1] = e

        writer.add_scalar('loss/eval', avg_eval_loss, e)
        # clean memory
        del avg_eval_loss, total_eval_loss
        # save model state to dict
        torch.save(model.state_dict(), './models/' + 'epo_' + str(e))

        print("===============================")

        # testing on holdout data
        index = 0
        for test_dataloader in all_test_dataloader:
            test_file_name = test_file_names[index]
            index += 1
            testing_start_time = time.time()
            predictions = []
            true_labels = []
            for batch in test_dataloader:
                batch = tuple(t.to(DEVICE) for t in batch)
                b_input_ids, b_labels = batch
                with torch.no_grad():
                    logits = model(b_input_ids)
                logits = logits['logits'].detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()
                predictions.append(logits)
                true_labels.append(label_ids)
            # calculate errors
            distance_records = []
            for i in range(len(predictions)):
                for j in range(len(predictions[i])):
                    distance = abs(predictions[i][j] - true_labels[i][j])
                    distance_records.append(distance)

            ## MAE = mean value of all absolute errors (stored in distance_records)
            MAE = np.mean(np.array(distance_records))
            ## MdAE = median value of all absolute errors (stored in distance_records)
            MdAE = np.median(np.array(distance_records))

            MAE_RECORDS.append(MAE)
            MDAE_RECORDS.append(MdAE)

            global OUTPUT
            OUTPUT +=  'Epochs ' + str(e) + '\n'
            OUTPUT += 'MAE: ' + str(MAE) + '\n'
            OUTPUT += 'MdAE: ' + str(MdAE) + '\n\n'
            print('MAE: ', MAE)
            print('MdAE: ', MdAE)
    writer.flush()
    writer.close()

    # select model
    os.rename('models/epo_' + str(min_eval_loss_epoch[1]),
              'models/' + str(file_pair['train'][0]) + '_'
              + str(file_pair['test'][0]) + '_epo_' + str(min_eval_loss_epoch[1]))

    # del unwanted models
    for i in range(20):
        try:
            os.remove("models/epo_" + str(i))
        except:
            continue

    OUTPUT += 'MAE: ' + str(MAE_RECORDS[min_eval_loss_epoch[1]]) \
                + '  MdAE: ' + str(MDAE_RECORDS[min_eval_loss_epoch[1]]) + '\n'
    OUTPUT += 'training time: ' + str(time_records[min_eval_loss_epoch[1]]) + '\n'
    OUTPUT += 'Epochs: ' + str(min_eval_loss_epoch[1]) +'\n'
    global BATCH_SIZE
    OUTPUT += 'batch size: ' + str(BATCH_SIZE) + '\n'
    global ADD_DESCRIPTION
    OUTPUT += 'Description added : ' + str(ADD_DESCRIPTION) + '\n'


    print('all done for one project')

### Within Project Training Script

In [8]:
torch.cuda.empty_cache()

In [11]:
global WITHIN_PROJECT
WITHIN_PROJECT = True

TRAIN_TEST_FILE_PAIRS = [
                        # {'train': ['appceleratorstudio'], 'test': ['appceleratorstudio']},
                        {'train': ['aptanastudio'], 'test': ['aptanastudio']},
                        {'train': ['bamboo'], 'test': ['bamboo']},
                        {'train': ['clover'], 'test': ['clover']},
                        {'train': ['datamanagement'], 'test': ['datamanagement']},
                        {'train': ['duracloud'], 'test': ['duracloud']},
                        {'train': ['jirasoftware'], 'test': ['jirasoftware']},
                        {'train': ['mesos'], 'test': ['mesos']},
                        {'train': ['moodle'], 'test': ['moodle']},
                        {'train': ['mule'], 'test': ['mule']},
                        {'train': ['mulestudio'], 'test': ['mulestudio']},
                        {'train': ['springxd'], 'test': ['springxd']},
                        {'train': ['talenddataquality'], 'test': ['talenddataquality']},
                        {'train': ['talendesb'], 'test': ['talendesb']},
                        {'train': ['titanium'], 'test': ['titanium']},
                        {'train': ['usergrid'], 'test': ['usergrid']},
                        ]


def main():
    global TRAIN_TEST_FILE_PAIRS, MODEL, TOKENIZER, MODEL_NAME
    for file in TRAIN_TEST_FILE_PAIRS:
        if TOKENIZER == 'bbpe':
            config = GPT2Config(num_labels=1, pad_token_id=50257)
        elif TOKENIZER == 'gpt2':
            config = GPT2Config(num_labels=1, pad_token_id=50256)
        elif TOKENIZER == 'wordpiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'sentencepiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'wordlevel':
            config = GPT2Config(num_labels=1, pad_token_id=3)
        elif TOKENIZER == 'bert':
            config = BertConfig(num_labels=1, pad_token_id=0)




        if MODEL_NAME == 'gpt2':
            MODEL = LinearGPT2.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        elif MODEL_NAME == 'gpt2sp':
            MODEL = GPT2SP.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        elif MODEL_NAME == 'bert':
            MODEL = BertSP.from_pretrained('bert-base-uncased', config=config)
            MODEL.cuda()



        file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names = data_processing(file_pair=file)
        train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, MODEL, test_file_names)
        del MODEL
        torch.cuda.empty_cache()
        global OUTPUT
        with open('./results/' + str(file['train'][0]) + '_' + str(file['test'][0]) +'.txt', 'w+') as f:
            f.writelines(OUTPUT)
            print('results have been written into a text file!')
            OUTPUT = ""


if __name__ == "__main__":
    main()

n_embd/hidden_size :  768


Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Add Copy URL actions to right-click context menu of Remote view for S3 files
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 5587, 6100, 24471, 2140, 4506, 2000, 2157, 1011, 11562, 6123, 12183, 1997, 6556, 3193, 2005, 1055, 2509, 6764, 102], [101, 26794, 5162, 5363, 2000, 2330, 1037, 2047, 6013, 1997, 2993, 2043, 3098, 6764, 3081, 3645, 10566, 102, 0, 0], [101, 4180, 6509, 3769, 6279, 2003, 3491, 2648, 1996, 3898, 7372, 102, 0, 0, 0, 0, 0, 0, 0, 0], [101, 25718, 8285, 9006, 10814, 3508, 2005, 6687, 4725, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 11920, 2019, 3746, 2046, 1996, 16129, 3559, 2323, 3443, 2019, 3746, 6415, 102, 0, 0, 0, 0, 0, 0]]
BATCH_SIZE :  149
BATCH_SIZE :  149
usingbert tokenizer
BATCH_SIZE :  149
Start training for  {'train': ['aptanastudio'], 'test': ['aptanastudio']} .....




>>> epoch  0
 Average training MAE loss: 8.20
-
 Average eval MAE loss: 16.58
MAE:  17.908117
MdAE:  19.757294
>>> epoch  1
 Average training MAE loss: 13.00
-
 Average eval MAE loss: 8.72
MAE:  7.4582424
MdAE:  5.241375
>>> epoch  2
 Average training MAE loss: 7.67
-
 Average eval MAE loss: 8.28
MAE:  9.669859
MdAE:  11.07366
>>> epoch  3
 Average training MAE loss: 5.79
-
 Average eval MAE loss: 3.15
MAE:  3.427276
MdAE:  1.58763
>>> epoch  4
 Average training MAE loss: 4.37
-
 Average eval MAE loss: 2.92
MAE:  3.8963127
MdAE:  3.4498835
>>> epoch  5
 Average training MAE loss: 4.54
-
 Average eval MAE loss: 3.50
MAE:  4.5576105
MdAE:  4.505416
>>> epoch  6
 Average training MAE loss: 4.40
-
 Average eval MAE loss: 3.00
MAE:  3.4865334
MdAE:  2.0347486
>>> epoch  7
 Average training MAE loss: 4.25
-
 Average eval MAE loss: 2.88
MAE:  3.8516173
MdAE:  3.378543
>>> epoch  8
 Average training MAE loss: 4.40
-
 Average eval MAE loss: 3.31
MAE:  4.340903
MdAE:  4.1595182
>>> epoch  9
 Ave

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Allows CVS repo to timeout and report on locking issues
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 4473, 26226, 2015, 16360, 2080, 2000, 2051, 5833, 1998, 3189, 2006, 14889, 3314, 102, 0, 0, 0, 0, 0], [101, 3499, 1037, 3857, 2000, 2022, 2872, 2012, 1996, 2132, 1997, 1996, 3857, 24240, 1012, 1012, 1012, 1006, 2030, 102], [101, 2765, 2025, 5552, 2043, 4638, 5833, 11896, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2831, 5963, 2013, 21274, 4005, 2000, 15216, 8241, 2000, 2421, 1041, 5910, 3872, 4057, 3463, 102, 0, 0, 0], [101, 5198, 2064, 2156, 1996, 5003, 8159, 11336, 1006, 2177, 3593, 1010, 20785, 3593, 1010, 2544, 1007, 2114, 2169, 102]]
BATCH_SIZE :  93
BATCH_SIZE :  93
usingbert tokenizer
BATCH_SIZE :  93
Start training for  {'train': ['bamboo'], 'test': ['bamboo']} .....




>>> epoch  0
 Average training MAE loss: 4.40
-
 Average eval MAE loss: 19.79
MAE:  19.63551
MdAE:  19.512028
>>> epoch  1
 Average training MAE loss: 15.10
-
 Average eval MAE loss: 14.48
MAE:  14.633713
MdAE:  14.757519
>>> epoch  2
 Average training MAE loss: 6.88
-
 Average eval MAE loss: 1.58
MAE:  1.5144242
MdAE:  1.2259631
>>> epoch  3
 Average training MAE loss: 2.90
-
 Average eval MAE loss: 0.84
MAE:  0.92206204
MdAE:  0.69802594
>>> epoch  4
 Average training MAE loss: 1.60
-
 Average eval MAE loss: 1.04
MAE:  1.194781
MdAE:  1.3185905
>>> epoch  5
 Average training MAE loss: 1.65
-
 Average eval MAE loss: 1.12
MAE:  1.1313633
MdAE:  0.67446065
>>> epoch  6
 Average training MAE loss: 1.56
-
 Average eval MAE loss: 0.77
MAE:  0.87141585
MdAE:  0.7881584
>>> epoch  7
 Average training MAE loss: 1.51
-
 Average eval MAE loss: 0.92
MAE:  0.9851508
MdAE:  0.5857487
>>> epoch  8
 Average training MAE loss: 1.46
-
 Average eval MAE loss: 1.26
MAE:  1.2381518
MdAE:  0.8645077
>>> e

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Line coverage data is inconsistent
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 2240, 6325, 2951, 2003, 20316, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2469, 10273, 2465, 15069, 2003, 16542, 2043, 5834, 2006, 1037, 15723, 1998, 1037, 3231, 1011, 15723, 2013, 1996, 102], [101, 4292, 2569, 16015, 6434, 16101, 2000, 1037, 3120, 16101, 3972, 12870, 2015, 2035, 3120, 999, 102, 0, 0, 0], [101, 5587, 4118, 1013, 4861, 2504, 16015, 4292, 1999, 2622, 5144, 3931, 102, 0, 0, 0, 0, 0, 0, 0], [101, 2191, 3231, 2448, 10566, 4685, 2488, 1998, 2644, 14889, 1996, 21318, 11689, 102, 0, 0, 0, 0, 0, 0]]
BATCH_SIZE :  69
BATCH_SIZE :  69
usingbert tokenizer
BATCH_SIZE :  69
Start training for  {'train': ['clover'], 'test': ['clover']} .....




>>> epoch  0
 Average training MAE loss: 8.74
-
 Average eval MAE loss: 12.15
MAE:  11.77961
MdAE:  12.292812
>>> epoch  1
 Average training MAE loss: 8.86
-
 Average eval MAE loss: 5.61
MAE:  6.29273
MdAE:  5.540004
>>> epoch  2
 Average training MAE loss: 6.27
-
 Average eval MAE loss: 5.35
MAE:  7.7931075
MdAE:  4.8320665
>>> epoch  3
 Average training MAE loss: 5.09
-
 Average eval MAE loss: 5.11
MAE:  5.917048
MdAE:  4.9727974
>>> epoch  4
 Average training MAE loss: 4.51
-
 Average eval MAE loss: 3.61
MAE:  6.058326
MdAE:  3.0972872
>>> epoch  5
 Average training MAE loss: 5.03
-
 Average eval MAE loss: 3.05
MAE:  4.422598
MdAE:  3.4131413
>>> epoch  6
 Average training MAE loss: 3.82
-
 Average eval MAE loss: 1.99
MAE:  3.8490348
MdAE:  2.2379248
>>> epoch  7
 Average training MAE loss: 4.31
-
 Average eval MAE loss: 1.65
MAE:  4.0978456
MdAE:  1.1368061
>>> epoch  8
 Average training MAE loss: 3.56
-
 Average eval MAE loss: 2.53
MAE:  4.1283503
MdAE:  2.9317083
>>> epoch  9
 Av

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Transition git repositories to Stash
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 6653, 21025, 2102, 16360, 20049, 29469, 2229, 2000, 2358, 11823, 102, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2345, 4697, 1040, 2213, 3260, 4861, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2330, 2039, 1048, 4757, 2102, 4007, 5653, 2075, 7201, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 6653, 2000, 13693, 3980, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 5587, 16942, 1011, 2241, 23569, 27605, 6290, 2000, 2033, 3022, 1035, 4800, 8873, 2102, 102, 0, 0, 0, 0]]
BATCH_SIZE :  840
BATCH_SIZE :  840
usingbert tokenizer
BATCH_SIZE :  840
Start training for  {'train': ['datamanagement'], 'test': ['datamanagement']} .....




>>> epoch  0
 Average training MAE loss: 10.50
-
 Average eval MAE loss: 19.31
MAE:  18.517624
MdAE:  19.25618
>>> epoch  1
 Average training MAE loss: 17.99
-
 Average eval MAE loss: 12.44
MAE:  14.229072
MdAE:  9.984961
>>> epoch  2
 Average training MAE loss: 11.77
-
 Average eval MAE loss: 7.97
MAE:  8.294781
MdAE:  6.9488077
>>> epoch  3
 Average training MAE loss: 9.48
-
 Average eval MAE loss: 5.30
MAE:  7.060936
MdAE:  2.697729
>>> epoch  4
 Average training MAE loss: 8.73
-
 Average eval MAE loss: 5.40
MAE:  6.403606
MdAE:  3.4998403
>>> epoch  5
 Average training MAE loss: 8.70
-
 Average eval MAE loss: 4.91
MAE:  6.5784335
MdAE:  1.7920029
>>> epoch  6
 Average training MAE loss: 8.69
-
 Average eval MAE loss: 5.64
MAE:  6.5435896
MdAE:  4.0876045
>>> epoch  7
 Average training MAE loss: 8.82
-
 Average eval MAE loss: 4.88
MAE:  6.288755
MdAE:  2.3886838
>>> epoch  8
 Average training MAE loss: 8.97
-
 Average eval MAE loss: 4.92
MAE:  6.2139606
MdAE:  2.8544025
>>> epoch  9

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Document logging framework
within project split!
usingbert tokenizer
usingbert tokenizer


  train_data = train_data.append(df)


[[101, 6254, 15899, 7705, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 3499, 2005, 3722, 2039, 16616, 1997, 2622, 2544, 3616, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 9625, 7170, 1024, 20410, 3144, 4241, 22648, 23743, 2094, 13749, 4355, 1997, 2184, 2102, 2497, 1997, 1038, 7317, 102], [101, 16545, 13910, 2475, 2243, 1024, 3746, 7584, 2326, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 16545, 13910, 2475, 2243, 1024, 3746, 8241, 2326, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
BATCH_SIZE :  119
BATCH_SIZE :  119
usingbert tokenizer
BATCH_SIZE :  119
Start training for  {'train': ['duracloud'], 'test': ['duracloud']} .....




>>> epoch  0
 Average training MAE loss: 4.30
-
 Average eval MAE loss: 15.85
MAE:  15.922928
MdAE:  15.729273
>>> epoch  1
 Average training MAE loss: 6.73
-
 Average eval MAE loss: 2.38
MAE:  2.30764
MdAE:  2.501669
>>> epoch  2
 Average training MAE loss: 3.76
-
 Average eval MAE loss: 3.73
MAE:  3.6529746
MdAE:  3.8470042
>>> epoch  3
 Average training MAE loss: 3.76
-
 Average eval MAE loss: 3.11
MAE:  3.172454
MdAE:  2.928298
>>> epoch  4
 Average training MAE loss: 3.36
-
 Average eval MAE loss: 1.04
MAE:  0.8404047
MdAE:  1.10255
>>> epoch  5
 Average training MAE loss: 2.82
-
 Average eval MAE loss: 2.18
MAE:  2.0993702
MdAE:  2.2934
>>> epoch  6
 Average training MAE loss: 2.00
-
 Average eval MAE loss: 1.70
MAE:  1.6314968
MdAE:  1.7490122
>>> epoch  7
 Average training MAE loss: 1.83
-
 Average eval MAE loss: 2.69
MAE:  2.617958
MdAE:  2.811988
>>> epoch  8
 Average training MAE loss: 2.66
-
 Average eval MAE loss: 0.92
MAE:  0.7960507
MdAE:  0.66769946
>>> epoch  9
 Averag

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  As a JIRA Administrator I would like to be able to change the trigger of the night service
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 2004, 1037, 10147, 2527, 8911, 1045, 2052, 2066, 2000, 2022, 2583, 2000, 2689, 1996, 9495, 1997, 1996, 2305, 102], [101, 2004, 1037, 10147, 2527, 8911, 1045, 2052, 2066, 2000, 2022, 2583, 2000, 2689, 1996, 9495, 1997, 1996, 2305, 102], [101, 12391, 4773, 6198, 14593, 2229, 2089, 13249, 2007, 2060, 13354, 7076, 102, 0, 0, 0, 0, 0, 0, 0], [101, 12391, 4773, 6198, 14593, 2229, 2089, 13249, 2007, 2060, 13354, 7076, 102, 0, 0, 0, 0, 0, 0, 0], [101, 5587, 3793, 2000, 1996, 29003, 11721, 24291, 1000, 19528, 2622, 1000, 4471, 102, 0, 0, 0, 0, 0, 0]]
BATCH_SIZE :  63
BATCH_SIZE :  63
usingbert tokenizer
BATCH_SIZE :  63
Start training for  {'train': ['jirasoftware'], 'test': ['jirasoftware']} .....




>>> epoch  0
 Average training MAE loss: 6.72
-
 Average eval MAE loss: 18.40
MAE:  17.674198
MdAE:  18.32214
>>> epoch  1
 Average training MAE loss: 6.51
-
 Average eval MAE loss: 2.62
MAE:  2.2147145
MdAE:  1.8466048
>>> epoch  2
 Average training MAE loss: 5.07
-
 Average eval MAE loss: 2.79
MAE:  3.5134997
MdAE:  2.8656123
>>> epoch  3
 Average training MAE loss: 4.71
-
 Average eval MAE loss: 2.84
MAE:  2.4256287
MdAE:  2.1732135
>>> epoch  4
 Average training MAE loss: 2.89
-
 Average eval MAE loss: 3.14
MAE:  2.7439935
MdAE:  2.5563307
>>> epoch  5
 Average training MAE loss: 3.03
-
 Average eval MAE loss: 2.84
MAE:  2.4256656
MdAE:  2.1732578
>>> epoch  6
 Average training MAE loss: 3.41
-
 Average eval MAE loss: 2.69
MAE:  2.2762847
MdAE:  1.9876189
>>> epoch  7
 Average training MAE loss: 2.81
-
 Average eval MAE loss: 2.03
MAE:  1.7729985
MdAE:  1.2693214
>>> epoch  8
 Average training MAE loss: 2.91
-
 Average eval MAE loss: 3.88
MAE:  3.5230734
MdAE:  3.4938679
>>> epoch 

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Report executor terminations to framework schedulers.
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 3189, 4654, 8586, 16161, 2099, 18287, 2015, 2000, 7705, 6134, 2869, 1012, 102, 0, 0, 0, 0, 0, 0], [101, 2033, 17063, 6658, 2323, 17053, 4654, 8586, 16161, 2869, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 14451, 4708, 1035, 3478, 3114, 2000, 7705, 2015, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 13212, 7705, 11896, 2000, 2448, 2349, 2000, 2919, 9245, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2036, 4638, 1005, 21025, 2102, 4487, 4246, 1011, 1011, 9132, 29336, 1011, 1011, 9813, 1005, 1999, 2695, 1011, 102]]
BATCH_SIZE :  302
BATCH_SIZE :  302
usingbert tokenizer
BATCH_SIZE :  302
Start training for  {'train': ['mesos'], 'test': ['mesos']} .....




>>> epoch  0
 Average training MAE loss: 8.14
-
 Average eval MAE loss: 9.04
MAE:  9.300682
MdAE:  10.043852
>>> epoch  1
 Average training MAE loss: 7.42
-
 Average eval MAE loss: 1.54
MAE:  1.4386982
MdAE:  1.3528974
>>> epoch  2
 Average training MAE loss: 3.33
-
 Average eval MAE loss: 2.80
MAE:  2.9380486
MdAE:  3.5100763
>>> epoch  3
 Average training MAE loss: 2.87
-
 Average eval MAE loss: 2.02
MAE:  1.74456
MdAE:  0.9899574
>>> epoch  4
 Average training MAE loss: 2.47
-
 Average eval MAE loss: 1.52
MAE:  1.4131666
MdAE:  1.3116541
>>> epoch  5
 Average training MAE loss: 1.82
-
 Average eval MAE loss: 1.47
MAE:  1.2115922
MdAE:  0.7262578
>>> epoch  6
 Average training MAE loss: 1.74
-
 Average eval MAE loss: 1.40
MAE:  1.2616229
MdAE:  1.0668526
>>> epoch  7
 Average training MAE loss: 1.74
-
 Average eval MAE loss: 1.44
MAE:  1.2144455
MdAE:  0.51342535
>>> epoch  8
 Average training MAE loss: 1.69
-
 Average eval MAE loss: 1.52
MAE:  1.411985
MdAE:  1.3097451
>>> epoch  9


Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Forum: Per-discussion subscription
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 7057, 1024, 2566, 1011, 6594, 15002, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 7057, 1024, 7514, 2011, 1041, 1011, 5653, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 3499, 5089, 2000, 20648, 3056, 4249, 1999, 7809, 4023, 2004, 3223, 102, 0, 0, 0, 0, 0, 0, 0], [101, 7057, 1024, 10838, 2000, 2279, 11689, 4957, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1037, 2224, 6337, 1005, 1055, 2607, 2862, 2003, 12098, 16313, 8486, 2135, 3132, 2007, 2053, 2126, 1997, 3773, 102]]
BATCH_SIZE :  209
BATCH_SIZE :  209
usingbert tokenizer
BATCH_SIZE :  209
Start training for  {'train': ['moodle'], 'test': ['moodle']} .....
>>> epoch  0




 Average training MAE loss: 18.99
-
 Average eval MAE loss: 15.16
MAE:  13.000348
MdAE:  14.853317
>>> epoch  1
 Average training MAE loss: 16.77
-
 Average eval MAE loss: 14.12
MAE:  9.5092
MdAE:  10.047956
>>> epoch  2
 Average training MAE loss: 13.77
-
 Average eval MAE loss: 16.25
MAE:  5.8497553
MdAE:  4.27724
>>> epoch  3
 Average training MAE loss: 12.62
-
 Average eval MAE loss: 16.75
MAE:  5.5218453
MdAE:  3.3850212
>>> epoch  4
 Average training MAE loss: 12.91
-
 Average eval MAE loss: 14.39
MAE:  8.758314
MdAE:  8.803181
>>> epoch  5
 Average training MAE loss: 13.05
-
 Average eval MAE loss: 14.31
MAE:  8.965117
MdAE:  9.148838
>>> epoch  6
 Average training MAE loss: 12.97
-
 Average eval MAE loss: 17.00
MAE:  5.3787727
MdAE:  3.0458975
>>> epoch  7
 Average training MAE loss: 13.93
-
 Average eval MAE loss: 14.72
MAE:  7.9373507
MdAE:  7.4310036
>>> epoch  8
 Average training MAE loss: 12.49
-
 Average eval MAE loss: 14.64
MAE:  8.141935
MdAE:  7.7729487
>>> epoch  9
 A

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Implement true multicast functionality for <all> processor
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 10408, 2995, 4800, 10526, 15380, 2005, 1026, 2035, 1028, 13151, 102, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2515, 20568, 2490, 1060, 2050, 12598, 2006, 1060, 2050, 4219, 2109, 2011, 1037, 3500, 4874, 6922, 1029, 102, 0], [101, 2421, 27354, 11336, 1999, 20568, 4563, 4353, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2127, 1011, 3144, 2323, 2275, 6453, 18093, 2007, 2197, 6453, 2363, 2077, 6016, 2000, 21469, 4160, 102, 0, 0], [101, 2127, 1011, 3144, 2323, 2490, 26351, 8093, 17175, 2271, 2224, 3572, 102, 0, 0, 0, 0, 0, 0, 0]]
BATCH_SIZE :  159
BATCH_SIZE :  159
usingbert tokenizer
BATCH_SIZE :  159
Start training for  {'train': ['mule'], 'test': ['mule']} .....
>>> epoch  0




 Average training MAE loss: 6.75
-
 Average eval MAE loss: 17.07
MAE:  16.703669
MdAE:  16.327633
>>> epoch  1
 Average training MAE loss: 6.39
-
 Average eval MAE loss: 9.69
MAE:  10.058205
MdAE:  10.434607
>>> epoch  2
 Average training MAE loss: 7.59
-
 Average eval MAE loss: 3.69
MAE:  3.6116757
MdAE:  4.370677
>>> epoch  3
 Average training MAE loss: 3.55
-
 Average eval MAE loss: 2.67
MAE:  2.8938346
MdAE:  2.6166983
>>> epoch  4
 Average training MAE loss: 3.85
-
 Average eval MAE loss: 5.79
MAE:  5.599339
MdAE:  4.8498917
>>> epoch  5
 Average training MAE loss: 3.87
-
 Average eval MAE loss: 2.40
MAE:  2.5938854
MdAE:  1.6222594
>>> epoch  6
 Average training MAE loss: 2.96
-
 Average eval MAE loss: 2.77
MAE:  2.8397224
MdAE:  2.7729132
>>> epoch  7
 Average training MAE loss: 2.92
-
 Average eval MAE loss: 2.36
MAE:  2.4987254
MdAE:  2.93287
>>> epoch  8
 Average training MAE loss: 2.85
-
 Average eval MAE loss: 2.39
MAE:  2.5276031
MdAE:  2.8731008
>>> epoch  9
 Average trai

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Support for request/reply
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 2490, 2005, 5227, 1013, 7514, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 3685, 12324, 1037, 2996, 2622, 2013, 21025, 2102, 2302, 10697, 102, 0, 0, 0, 0, 0, 0, 0, 0], [101, 3431, 2000, 9262, 3642, 2079, 2025, 2131, 2980, 7333, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 4039, 2000, 5587, 1037, 3433, 2043, 4526, 1037, 2117, 4834, 1999, 1996, 2168, 1049, 12314, 102, 0, 0, 0], [101, 3415, 15327, 2015, 1999, 1996, 20950, 3193, 2024, 2025, 2108, 3718, 2043, 2017, 6366, 2035, 1996, 3787, 1997, 102]]
BATCH_SIZE :  131
BATCH_SIZE :  131
usingbert tokenizer
BATCH_SIZE :  131
Start training for  {'train': ['mulestudio'], 'test': ['mulestudio']} .....




>>> epoch  0
 Average training MAE loss: 6.09
-
 Average eval MAE loss: 13.51
MAE:  13.825926
MdAE:  15.743349
>>> epoch  1
 Average training MAE loss: 6.65
-
 Average eval MAE loss: 4.22
MAE:  3.7352636
MdAE:  1.7850657
>>> epoch  2
 Average training MAE loss: 3.90
-
 Average eval MAE loss: 6.57
MAE:  6.32836
MdAE:  6.991596
>>> epoch  3
 Average training MAE loss: 5.17
-
 Average eval MAE loss: 10.13
MAE:  9.162943
MdAE:  7.2173653
>>> epoch  4
 Average training MAE loss: 5.87
-
 Average eval MAE loss: 4.28
MAE:  3.865741
MdAE:  2.2244186
>>> epoch  5
 Average training MAE loss: 3.89
-
 Average eval MAE loss: 4.19
MAE:  3.651397
MdAE:  2.4339266
>>> epoch  6
 Average training MAE loss: 3.60
-
 Average eval MAE loss: 5.15
MAE:  4.493931
MdAE:  2.0430646
>>> epoch  7
 Average training MAE loss: 3.15
-
 Average eval MAE loss: 4.27
MAE:  3.8403313
MdAE:  2.0278244
>>> epoch  8
 Average training MAE loss: 2.99
-
 Average eval MAE loss: 4.64
MAE:  4.024625
MdAE:  2.9904623
>>> epoch  9
 Av

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  HDFS ItemWriter
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 10751, 10343, 8875, 15994, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 10751, 10343, 4563, 3015, 2393, 2121, 4280, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 3149, 15584, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 10722, 10814, 2951, 3252, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 25353, 14540, 8649, 13749, 4355, 3258, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
BATCH_SIZE :  634
BATCH_SIZE :  634
usingbert tokenizer
BATCH_SIZE :  634
Start training for  {'train': ['springxd'], 'test': ['springxd']} .....




>>> epoch  0
 Average training MAE loss: 6.61
-
 Average eval MAE loss: 13.01
MAE:  13.309196
MdAE:  13.569466
>>> epoch  1
 Average training MAE loss: 8.72
-
 Average eval MAE loss: 6.80
MAE:  7.1331015
MdAE:  7.3575087
>>> epoch  2
 Average training MAE loss: 6.63
-
 Average eval MAE loss: 7.60
MAE:  7.3249297
MdAE:  7.045893
>>> epoch  3
 Average training MAE loss: 4.41
-
 Average eval MAE loss: 2.58
MAE:  2.648204
MdAE:  2.8017993
>>> epoch  4
 Average training MAE loss: 2.97
-
 Average eval MAE loss: 3.20
MAE:  2.9187212
MdAE:  2.6396847
>>> epoch  5
 Average training MAE loss: 2.74
-
 Average eval MAE loss: 2.78
MAE:  2.8755703
MdAE:  2.5034342
>>> epoch  6
 Average training MAE loss: 2.57
-
 Average eval MAE loss: 1.93
MAE:  1.7502027
MdAE:  1.3839216
>>> epoch  7
 Average training MAE loss: 2.21
-
 Average eval MAE loss: 1.84
MAE:  1.7289528
MdAE:  1.7589822
>>> epoch  8
 Average training MAE loss: 2.19
-
 Average eval MAE loss: 1.80
MAE:  1.7373941
MdAE:  1.9430647
>>> epoch  

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  SQL Server Single Sign On Support doesn't work in data profiler repository connections
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 29296, 8241, 2309, 3696, 2006, 2490, 2987, 1005, 1056, 2147, 1999, 2951, 6337, 2099, 22409, 7264, 102, 0, 0], [101, 6366, 2048, 7753, 1999, 6075, 7251, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 4434, 4106, 1024, 2592, 1035, 8040, 28433, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 3674, 4106, 20390, 2024, 1000, 3897, 2098, 2041, 1000, 1998, 3685, 2022, 2109, 102, 0, 0, 0, 0, 0], [101, 1000, 3193, 19528, 10281, 1000, 12183, 2515, 2025, 4653, 2006, 1037, 2795, 4106, 1006, 2007, 25410, 3627, 1007, 102]]
BATCH_SIZE :  248
BATCH_SIZE :  248
usingbert tokenizer
BATCH_SIZE :  248
Start training for  {'train': ['talenddataquality'], 'test': ['talenddataquality']} .....




>>> epoch  0
 Average training MAE loss: 6.99
-
 Average eval MAE loss: 16.41
MAE:  19.226973
MdAE:  20.632088
>>> epoch  1
 Average training MAE loss: 7.61
-
 Average eval MAE loss: 4.35
MAE:  4.0921803
MdAE:  4.086375
>>> epoch  2
 Average training MAE loss: 7.57
-
 Average eval MAE loss: 4.87
MAE:  5.316209
MdAE:  5.756601
>>> epoch  3
 Average training MAE loss: 4.14
-
 Average eval MAE loss: 4.11
MAE:  2.8754363
MdAE:  3.1370058
>>> epoch  4
 Average training MAE loss: 3.85
-
 Average eval MAE loss: 4.70
MAE:  4.9090595
MdAE:  5.2010326
>>> epoch  5
 Average training MAE loss: 3.72
-
 Average eval MAE loss: 4.11
MAE:  2.9013276
MdAE:  3.1901307
>>> epoch  6
 Average training MAE loss: 3.96
-
 Average eval MAE loss: 4.61
MAE:  4.706889
MdAE:  4.9251647
>>> epoch  7
 Average training MAE loss: 3.76
-
 Average eval MAE loss: 4.02
MAE:  3.2862446
MdAE:  3.0200748
>>> epoch  8
 Average training MAE loss: 3.82
-
 Average eval MAE loss: 4.27
MAE:  3.902217
MdAE:  3.8271627
>>> epoch  9
 

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Investigation: S1 Improved user experience with TOS/TIS/ESB Studio
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 4812, 1024, 1055, 2487, 5301, 5310, 3325, 2007, 2000, 2015, 1013, 22320, 1013, 9686, 2497, 2996, 102, 0, 0], [101, 8556, 1024, 1055, 2475, 2449, 2326, 7375, 1998, 10802, 8819, 1999, 2000, 2015, 1013, 22320, 1013, 2996, 102, 0], [101, 8556, 1024, 1055, 2509, 5301, 20950, 2951, 8304, 6177, 1999, 2000, 2015, 1013, 22320, 1013, 2996, 102, 0, 0], [101, 8556, 1024, 1055, 2581, 22320, 19387, 2595, 2241, 2006, 1996, 2326, 4713, 102, 0, 0, 0, 0, 0, 0], [101, 8556, 1024, 21025, 2705, 12083, 9230, 2047, 6695, 1998, 7860, 102, 0, 0, 0, 0, 0, 0, 0, 0]]
BATCH_SIZE :  156
BATCH_SIZE :  156
usingbert tokenizer
BATCH_SIZE :  156
Start training for  {'train': ['talendesb'], 'test': ['talendesb']} .....
>>> epoch  0




 Average training MAE loss: 4.52
-
 Average eval MAE loss: 19.28
MAE:  19.480928
MdAE:  19.527344
>>> epoch  1
 Average training MAE loss: 14.87
-
 Average eval MAE loss: 14.73
MAE:  14.52593
MdAE:  14.479952
>>> epoch  2
 Average training MAE loss: 6.85
-
 Average eval MAE loss: 2.40
MAE:  2.2393188
MdAE:  2.024942
>>> epoch  3
 Average training MAE loss: 1.97
-
 Average eval MAE loss: 1.82
MAE:  1.6599326
MdAE:  1.6655596
>>> epoch  4
 Average training MAE loss: 1.47
-
 Average eval MAE loss: 1.19
MAE:  1.0040269
MdAE:  0.6958624
>>> epoch  5
 Average training MAE loss: 1.27
-
 Average eval MAE loss: 1.34
MAE:  1.1805524
MdAE:  1.5782452
>>> epoch  6
 Average training MAE loss: 1.22
-
 Average eval MAE loss: 1.09
MAE:  0.927117
MdAE:  0.8617346
>>> epoch  7
 Average training MAE loss: 1.29
-
 Average eval MAE loss: 1.24
MAE:  1.080674
MdAE:  1.366308
>>> epoch  8
 Average training MAE loss: 1.14
-
 Average eval MAE loss: 1.15
MAE:  0.9711895
MdAE:  0.5422096
>>> epoch  9
 Average tra

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Android: While debugger is running, cannot back out and go back into an app
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 11924, 1024, 2096, 2139, 8569, 13327, 2003, 2770, 1010, 3685, 2067, 2041, 1998, 2175, 2067, 2046, 2019, 10439, 102], [101, 11924, 1024, 10439, 27774, 2196, 2579, 2013, 27339, 9397, 1012, 20950, 102, 0, 0, 0, 0, 0, 0, 0], [101, 11924, 1024, 3675, 5144, 2024, 3714, 2005, 3746, 8584, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 11924, 1024, 2516, 8237, 2003, 6913, 2043, 2440, 18182, 17624, 3898, 2003, 2109, 1012, 102, 0, 0, 0, 0], [101, 16380, 1024, 8011, 1998, 4530, 4949, 9231, 5754, 17287, 9285, 102, 0, 0, 0, 0, 0, 0, 0, 0]]
BATCH_SIZE :  405
BATCH_SIZE :  405
usingbert tokenizer
BATCH_SIZE :  405
Start training for  {'train': ['titanium'], 'test': ['titanium']} .....




>>> epoch  0
 Average training MAE loss: 7.35
-
 Average eval MAE loss: 16.89
MAE:  17.041813
MdAE:  17.242514
>>> epoch  1
 Average training MAE loss: 15.59
-
 Average eval MAE loss: 15.26
MAE:  15.13311
MdAE:  14.880337
>>> epoch  2
 Average training MAE loss: 8.63
-
 Average eval MAE loss: 3.14
MAE:  3.0250025
MdAE:  1.8830214
>>> epoch  3
 Average training MAE loss: 4.34
-
 Average eval MAE loss: 2.96
MAE:  2.8406284
MdAE:  1.532167
>>> epoch  4
 Average training MAE loss: 4.03
-
 Average eval MAE loss: 2.32
MAE:  2.1793866
MdAE:  1.5262527
>>> epoch  5
 Average training MAE loss: 4.01
-
 Average eval MAE loss: 2.94
MAE:  2.816268
MdAE:  1.5141902
>>> epoch  6
 Average training MAE loss: 4.22
-
 Average eval MAE loss: 2.68
MAE:  2.5274434
MdAE:  1.61954
>>> epoch  7
 Average training MAE loss: 3.97
-
 Average eval MAE loss: 2.64
MAE:  2.5171034
MdAE:  2.0834866
>>> epoch  8
 Average training MAE loss: 3.85
-
 Average eval MAE loss: 2.28
MAE:  2.1404345
MdAE:  1.6544824
>>> epoch  9

Some weights of BertSP were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.dense1.weight', 'bert.dense2.weight', 'bert.score.weight', 'bert.transformer.embeddings.LayerNorm.bias', 'bert.transformer.embeddings.LayerNorm.weight', 'bert.transformer.embeddings.position_embeddings.weight', 'bert.transformer.embeddings.token_type_embeddings.weight', 'bert.transformer.embeddings.word_embeddings.weight', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.transformer.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.transformer.encoder.layer.0.attention.output.dense.bias', 'bert.transformer.encoder.layer.0.attention.output.dense.weight', 'bert.transformer.encoder.layer.0.attention.self.key.bias', 'bert.transformer.encoder.layer.0.attention.self.key.weight', 'bert.transformer.encoder.layer.0.attention.self.query.bias', 'bert.transformer.encoder.layer.0.attention.self.query.weight', 'bert.transformer.encoder.layer.0.

### text : title
Input data feed :::  Asset data does not correctly obey contextual ownership like the entity
within project split!
usingbert tokenizer


  train_data = train_data.append(df)


usingbert tokenizer
[[101, 11412, 2951, 2515, 2025, 11178, 15470, 6123, 8787, 6095, 2066, 1996, 9178, 102, 0, 0, 0, 0, 0, 0], [101, 14451, 25416, 21898, 19204, 2012, 1996, 2717, 7563, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2919, 20248, 23032, 5651, 2972, 3074, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 4064, 5164, 2987, 1005, 1056, 6366, 2019, 9178, 3200, 1006, 19701, 2987, 1005, 1056, 2147, 2593, 1007, 102, 0], [101, 4840, 4748, 10020, 5310, 19204, 2180, 1005, 1056, 2147, 2006, 1013, 2968, 1013, 5198, 1013, 2033, 102, 0, 0]]
BATCH_SIZE :  86
BATCH_SIZE :  86
usingbert tokenizer
BATCH_SIZE :  86
Start training for  {'train': ['usergrid'], 'test': ['usergrid']} .....




>>> epoch  0
 Average training MAE loss: 4.76
-
 Average eval MAE loss: 17.87
MAE:  17.96912
MdAE:  17.91798
>>> epoch  1
 Average training MAE loss: 6.49
-
 Average eval MAE loss: 0.99
MAE:  1.4555333
MdAE:  1.4875145
>>> epoch  2
 Average training MAE loss: 2.61
-
 Average eval MAE loss: 5.32
MAE:  5.4205475
MdAE:  5.3690004
>>> epoch  3
 Average training MAE loss: 3.01
-
 Average eval MAE loss: 1.70
MAE:  1.7143028
MdAE:  1.5714598
>>> epoch  4
 Average training MAE loss: 1.73
-
 Average eval MAE loss: 1.45
MAE:  1.5482124
MdAE:  1.267483
>>> epoch  5
 Average training MAE loss: 1.44
-
 Average eval MAE loss: 1.40
MAE:  1.788959
MdAE:  1.1005278
>>> epoch  6
 Average training MAE loss: 1.15
-
 Average eval MAE loss: 0.94
MAE:  1.286987
MdAE:  0.5146842
>>> epoch  7
 Average training MAE loss: 1.08
-
 Average eval MAE loss: 0.89
MAE:  1.2683158
MdAE:  0.56405926
>>> epoch  8
 Average training MAE loss: 1.03
-
 Average eval MAE loss: 1.11
MAE:  1.5537964
MdAE:  1.3142138
>>> epoch  9


### Cross Project Training Script - Within Repository

In [None]:
global WITHIN_PROJECT
WITHIN_PROJECT = False

# within repo
TRAIN_TEST_FILE_PAIRS = [
                        {'train': ['mesos'], 'test': ['usergrid']},
                        {'train': ['usergrid'], 'test': ['mesos']},
                        {'train': ['appceleratorstudio'], 'test': ['aptanastudio']},
                        {'train': ['appceleratorstudio'], 'test': ['titanium']},
                        {'train': ['titanium'], 'test': ['appceleratorstudio']},
                        {'train': ['aptanastudio'], 'test': ['titanium']},
                        {'train': ['mule'], 'test': ['mulestudio']},
                        {'train': ['mulestudio'], 'test': ['mule']}
                        ]


def main():
    global TRAIN_TEST_FILE_PAIRS, MODEL, TOKENIZER, MODEL_NAME
    for file in TRAIN_TEST_FILE_PAIRS:
        if TOKENIZER == 'bbpe':
            config = GPT2Config(num_labels=1, pad_token_id=50257)
        elif TOKENIZER == 'gpt2':
            config = GPT2Config(num_labels=1, pad_token_id=50256)
        elif TOKENIZER == 'wordpiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'sentencepiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'wordlevel':
            config = GPT2Config(num_labels=1, pad_token_id=3)
        if MODEL_NAME == 'gpt2':
            MODEL = LinearGPT2.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        elif MODEL_NAME == 'gpt2sp':
            MODEL = GPT2SP.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names = data_processing(file_pair=file)
        train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, MODEL, test_file_names)
        del MODEL
        torch.cuda.empty_cache()
        global OUTPUT
        with open('./results/' + str(file['train'][0]) + '_' + str(file['test'][0]) +'.txt', 'w+') as f:
            f.writelines(OUTPUT)
            print('results have been written into a text file!')
            OUTPUT = ""


if __name__ == "__main__":
    main()

### Cross Project Training Script - Cross Repository

In [None]:
global WITHIN_PROJECT
WITHIN_PROJECT = False

# cross repo
TRAIN_TEST_FILE_PAIRS = [
                        {'train': ['clover'], 'test': ['usergrid']},
                        {'train': ['talendesb'], 'test': ['mesos']},
                        {'train': ['talenddataquality'], 'test': ['aptanastudio']},
                        {'train': ['mule'], 'test': ['titanium']},
                        {'train': ['talenddataquality'], 'test': ['appceleratorstudio']},
                        {'train': ['mulestudio'], 'test': ['titanium']},
                        {'train': ['appceleratorstudio'], 'test': ['mulestudio']},
                        {'train': ['appceleratorstudio'], 'test': ['mule']}
                        ]


def main():
    global TRAIN_TEST_FILE_PAIRS, MODEL, TOKENIZER, MODEL_NAME
    for file in TRAIN_TEST_FILE_PAIRS:
        if TOKENIZER == 'gpt2':
            config = GPT2Config(num_labels=1, pad_token_id=50256)
        elif TOKENIZER == 'wordpiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'sentencepiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'wordlevel':
            config = GPT2Config(num_labels=1, pad_token_id=3)
        if MODEL_NAME == 'gpt2':
            MODEL = LinearGPT2.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        elif MODEL_NAME == 'gpt2sp':
            MODEL = GPT2SP.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names = data_processing(file_pair=file)
        train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, MODEL, test_file_names)
        del MODEL
        torch.cuda.empty_cache()
        global OUTPUT
        with open('./results/' + str(file['train'][0]) + '_' + str(file['test'][0]) +'.txt', 'w+') as f:
            f.writelines(OUTPUT)
            print('results have been written into a text file!')
            OUTPUT = ""


if __name__ == "__main__":
    main()