# Model Training Script

### Necessary Library

In [15]:
pip install torch pandas===1.5.3 transformers numpy tokenizers koila tensorboard 

Note: you may need to restart the kernel to use updated packages.


In [17]:
import torch
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import GPT2Tokenizer, AdamW, get_linear_schedule_with_warmup
import numpy as np
import time
from torch.utils.tensorboard import SummaryWriter
from GPT2SP import GPT2ForSequenceClassification as GPT2SP
from transformers import GPT2ForSequenceClassification as LinearGPT2
from transformers import GPT2Config
import os
from tokenizers import Tokenizer
import torch.nn as nn

### Hyperparameters

In [18]:
global EPOCHS, BATCH_SIZE_RATIO, SEQUENCE_LEN, LEARNING_RATE, TOKENIZER, MODEL_NAME , ADD_DESCRIPTION

EPOCHS = 20
BATCH_SIZE_RATIO = 0.1 # within proj: 0.3 / cross proj: 0.4
SEQUENCE_LEN = 20
LEARNING_RATE = 5e-4
TOKENIZER = 'gpt2' # available: gpt2, wordlevel, sentencepiece, wordpiece 
MODEL_NAME = 'gpt2sp' # available: gpt2sp, gpt2
ADD_DESCRIPTION = False

# define device
global DEVICE
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# define files to be used
global DATA_PATH 
DATA_PATH = './sp_dataset/marked_data/'

### Static Methods and Variables

In [19]:
OUTPUT = '  '
MODEL = None
DYNAMIC_BATCH = True
BATCH_SIZE = None
WITHIN_PROJECT = None
MAE_RECORDS = []
MDAE_RECORDS = []

def data_processing(file_pair):
    global BATCH_SIZE, BATCH_SIZE_RATIO, DATA_PATH, WITHIN_PROJECT, DYNAMIC_BATCH

    train_data = pd.DataFrame(columns=['text', 'label'])
    for train_file_name in file_pair['train']:
        fname = DATA_PATH + train_file_name + '.csv'
        df = prepare_dataframe(fname)
        train_data = train_data.append(df)
        
    # data split
    if WITHIN_PROJECT:
        train_text, train_labels, val_text, val_labels, test_text, test_labels = within_project_split(train_data)
    else:
        train_text, train_labels, val_text, val_labels = train_val_split(train_data, 0.6)
    # define batch size dynamically based on training length
    if DYNAMIC_BATCH:
        BATCH_SIZE = int(len(train_text) * BATCH_SIZE_RATIO)
    # tokenization
    tokens_train = tokenization(train_text.tolist())
    tokens_val = tokenization(val_text.tolist())
    print(tokens_train['input_ids'][:5])
 
    train_seq = torch.tensor(tokens_train['input_ids'])
    train_y = torch.tensor(train_labels.tolist()).type(torch.LongTensor)
    train_dataloader = prepare_dataloader(train_seq, train_y, sampler_type='random')

    val_seq = torch.tensor(tokens_val['input_ids'])
    val_y = torch.tensor(val_labels.tolist()).type(torch.LongTensor)
    val_dataloader = prepare_dataloader(val_seq, val_y, sampler_type='sequential')
    
    # prepare testing datasets
    all_test_dataloader = []
    test_file_names = []
    if WITHIN_PROJECT:
        tokens_test = tokenization(test_text.tolist())
        test_seq = torch.tensor(tokens_test['input_ids'])
        test_y = torch.tensor(test_labels.tolist()).type(torch.LongTensor)
        test_dataloader = prepare_dataloader(test_seq, test_y, sampler_type='sequential')
        all_test_dataloader.append(test_dataloader)
        test_file_names.append(file_pair['test'][0])
        return file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names

    for test_file_name in file_pair['test']:
        fname = DATA_PATH + test_file_name + '.csv'
        test_data = prepare_dataframe(fname)

        test_text = test_data['text']
        test_labels = test_data['label']

        # tokenization
        tokens_test = tokenization(test_text.tolist())
        test_seq = torch.tensor(tokens_test['input_ids'])
        test_y = torch.tensor(test_labels.tolist()).type(torch.LongTensor)
        test_dataloader = prepare_dataloader(test_seq, test_y, sampler_type='sequential')

        all_test_dataloader.append(test_dataloader)
        test_file_names.append(test_file_name)
    print('cross project data processing!')
    return file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names


def train_val_split(data, split_ratio):
    print('cross project split!')
    split_point = int(len(data) * split_ratio)
    train_text = data['text'][:split_point]
    train_labels = data['label'][:split_point]
    val_text = data['text'][split_point:]
    val_labels = data['label'][split_point:]
    return train_text, train_labels, val_text, val_labels


def tokenization(text_list):
    global TOKENIZER, SEQUENCE_LEN, MODEL
    # tokenization
    if TOKENIZER == 'wordpiece':
        print('using wordpiece tokenizer!')
        tokenizer = BertTokenizer('all_tokenizers/word_piece/vocab.txt')
    elif TOKENIZER == 'sentencepiece':
        print('using sentencepiece tokenizer!')
        tokenizer = XLNetTokenizer('all_tokenizers/sentence_piece/spm_tokenizer.model', padding_side='right')
    elif TOKENIZER == 'wordlevel':
        print('using wordlevel tokenizer!')
        tokenizer = Tokenizer.from_file('all_tokenizers/word_level/wordlevel.json')
        encoded_sentences = {'input_ids':[]}
        for sentence in text_list:
            encoded = tokenizer.encode(sentence)
            encoded = encoded.ids
            if len(encoded) > SEQUENCE_LEN:
                encoded = encoded[:SEQUENCE_LEN]
            elif len(encoded) < SEQUENCE_LEN:
                padding = SEQUENCE_LEN - len(encoded)
                for _ in range(padding):
                    encoded.append(3)
            encoded_sentences['input_ids'].append(encoded)
        return encoded_sentences
    elif TOKENIZER == 'gpt2':
        print('using pretrained gpt-2 tokenizer')
        tokenizer = GPT2Tokenizer.from_pretrained(TOKENIZER)
        tokenizer.pad_token = '[PAD]'
    return tokenizer.batch_encode_plus(text_list, truncation=True, max_length=SEQUENCE_LEN, padding='max_length')


def prepare_dataframe(file_name):
    data = pd.read_csv(file_name)
    # some rows have no description, fill blank to avoid Null
    data = data.fillna(' ')
    

    if ADD_DESCRIPTION :
      print("### text : title+description")
      d = {'text': (data['title'] + " : " + data["description"]).tolist(), 'label': data['storypoint']}
    else:
      print("### text : title")
      d = {'text': (data['title']).tolist(), 'label': data['storypoint']}
    print("Input data feed ::: ",d['text'][0])
    return pd.DataFrame(data=d)


def prepare_dataloader(seq, y, sampler_type):
    global BATCH_SIZE
    tensor_dataset = TensorDataset(seq, y)
    if sampler_type == 'random':
        sampler = RandomSampler(tensor_dataset)
    elif sampler_type == 'sequential':
        sampler = SequentialSampler(tensor_dataset)
    dataloader = DataLoader(tensor_dataset, sampler=sampler, batch_size=BATCH_SIZE)
    return dataloader


def within_project_split(data):
    print('within project split!')
    train_val_split_point = int(len(data) * 0.6)
    val_test_split_point = int(len(data) * 0.8)
    train_text = data['text'][:train_val_split_point]
    train_labels = data['label'][:train_val_split_point]
    val_text = data['text'][train_val_split_point:val_test_split_point]
    val_labels = data['label'][train_val_split_point:val_test_split_point]
    test_text = data['text'][val_test_split_point:]
    test_labels = data['label'][val_test_split_point:]
    return train_text, train_labels, val_text, val_labels, test_text, test_labels   


def train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, model, test_file_names):
    global LEARNING_RATE, EPOCHS, MAE_RECORDS, MDAE_RECORDS, DEVICE
    optimizer = AdamW(MODEL.parameters(), lr=LEARNING_RATE)    
    # Total number of training steps is [number of batches] x [number of epochs]
    total_steps = len(train_dataloader) * EPOCHS
    # Create the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
    print("Start training for ", file_pair, ".....")
    training_start_time = time.time()
    
    # tensorboard writer
    writer_path = 'tb/' + str(file_pair['train'][0]) + '_' + str(file_pair['test'][0])
    writer = SummaryWriter(writer_path)
    
    # vars for model selection
    min_eval_loss_epoch = [10000, 0]
    
    time_records = []
    MAE_RECORDS = []
    MDAE_RECORDS = []
    start_time = time.time()
    loss_fct = nn.L1Loss()
    for e in range(EPOCHS):
        # ---TRAINING---
        # clean GPU memory
        torch.cuda.empty_cache()
        print(">>> epoch ", e)
        # set model into train mode
        model.train()
        total_train_loss = 0
        for step, batch in enumerate(train_dataloader):            
            b_input_ids = batch[0].to(DEVICE)
            b_labels = batch[1].to(DEVICE)
            model.zero_grad()
            result = model(b_input_ids, 
                           labels=b_labels,
                           return_dict=True)
            loss = result.loss
            logits = result.logits
            total_train_loss += loss.item()  
            loss.backward() 
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            # clean memory
            del step, batch, b_input_ids, b_labels, result, loss, logits

        avg_train_loss = total_train_loss / len(train_dataloader)
        print(" Average training MAE loss: {0:.2f}".format(avg_train_loss))
        writer.add_scalar('loss/train', avg_train_loss, e)
        # clean memory
        del avg_train_loss, total_train_loss
        
        time_records.append(time.time() - start_time)
        
        # ---EVAL---
        print("-")
        # set model into eval mode
        model.eval()
        total_eval_loss = 0
        for batch in val_dataloader:            
            b_input_ids = batch[0].to(DEVICE)
            b_labels = batch[1].to(DEVICE)
            model.zero_grad()
            result = model(b_input_ids, 
                           labels=b_labels,
                           return_dict=True)
            loss = result.loss
            logits = result.logits
            total_eval_loss += loss.item()  
            # clean memory
            del b_input_ids, b_labels, batch, result, loss, logits
        avg_eval_loss = total_eval_loss / len(val_dataloader)
        print(" Average eval MAE loss: {0:.2f}".format(avg_eval_loss))
        
        if avg_eval_loss <= min_eval_loss_epoch[0]:
            min_eval_loss_epoch[0] = avg_eval_loss
            min_eval_loss_epoch[1] = e
        
        writer.add_scalar('loss/eval', avg_eval_loss, e)
        # clean memory
        del avg_eval_loss, total_eval_loss
        # save model state to dict
        torch.save(model.state_dict(), './models/' + 'epo_' + str(e))
        
        print("===============================")
        
        # testing on holdout data
        index = 0
        for test_dataloader in all_test_dataloader:
            test_file_name = test_file_names[index]
            index += 1
            testing_start_time = time.time()
            predictions = []
            true_labels = []
            for batch in test_dataloader:
                batch = tuple(t.to(DEVICE) for t in batch)
                b_input_ids, b_labels = batch
                with torch.no_grad():
                    logits = model(b_input_ids)
                logits = logits['logits'].detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()
                predictions.append(logits)
                true_labels.append(label_ids)
            # calculate errors
            distance_records = []
            for i in range(len(predictions)):
                for j in range(len(predictions[i])):
                    distance = abs(predictions[i][j] - true_labels[i][j])
                    distance_records.append(distance)

            ## MAE = mean value of all absolute errors (stored in distance_records)
            MAE = np.mean(np.array(distance_records)) 
            ## MdAE = median value of all absolute errors (stored in distance_records)
            MdAE = np.median(np.array(distance_records)) 

            MAE_RECORDS.append(MAE)
            MDAE_RECORDS.append(MdAE)
            
            global OUTPUT
            OUTPUT +=  'Epochs ' + str(e) + '\n'
            OUTPUT += 'MAE: ' + str(MAE) + '\n'
            OUTPUT += 'MdAE: ' + str(MdAE) + '\n\n'
            print('MAE: ', MAE)
            print('MdAE: ', MdAE)
    writer.flush()
    writer.close()
    
    # select model
    os.rename('models/epo_' + str(min_eval_loss_epoch[1]), 
              'models/' + str(file_pair['train'][0]) + '_' 
              + str(file_pair['test'][0]) + '_epo_' + str(min_eval_loss_epoch[1]))
    
    # del unwanted models
    for i in range(20):
        try:
            os.remove("models/epo_" + str(i))
        except:
            continue
            
    OUTPUT += 'MAE: ' + str(MAE_RECORDS[min_eval_loss_epoch[1]]) \
                + '  MdAE: ' + str(MDAE_RECORDS[min_eval_loss_epoch[1]]) + '\n'
    OUTPUT += 'training time: ' + str(time_records[min_eval_loss_epoch[1]]) + '\n'
    OUTPUT += 'Epochs: ' + str(min_eval_loss_epoch[1]) +'\n'
    global BATCH_SIZE
    OUTPUT += 'batch size: ' + str(BATCH_SIZE) + '\n'
    global ADD_DESCRIPTION
    OUTPUT += 'Description added : ' + str(ADD_DESCRIPTION) + '\n'
    
    
    print('all done for one project')

### Within Project Training Script

In [20]:
torch.cuda.empty_cache() 

In [21]:
global WITHIN_PROJECT
WITHIN_PROJECT = True

TRAIN_TEST_FILE_PAIRS = [
                        {'train': ['appceleratorstudio'], 'test': ['appceleratorstudio']},
                        {'train': ['aptanastudio'], 'test': ['aptanastudio']},
                        {'train': ['bamboo'], 'test': ['bamboo']},
                        {'train': ['clover'], 'test': ['clover']},
                        {'train': ['datamanagement'], 'test': ['datamanagement']},
                        {'train': ['duracloud'], 'test': ['duracloud']},
                        {'train': ['jirasoftware'], 'test': ['jirasoftware']},
                        {'train': ['mesos'], 'test': ['mesos']},
                        {'train': ['moodle'], 'test': ['moodle']},
                        {'train': ['mule'], 'test': ['mule']},
                        {'train': ['mulestudio'], 'test': ['mulestudio']},
                        {'train': ['springxd'], 'test': ['springxd']},
                        {'train': ['talenddataquality'], 'test': ['talenddataquality']},
                        {'train': ['talendesb'], 'test': ['talendesb']},
                        {'train': ['titanium'], 'test': ['titanium']},
                        {'train': ['usergrid'], 'test': ['usergrid']},
                        ]


def main():
    global TRAIN_TEST_FILE_PAIRS, MODEL, TOKENIZER, MODEL_NAME
    for file in TRAIN_TEST_FILE_PAIRS:
        if TOKENIZER == 'bbpe':
            config = GPT2Config(num_labels=1, pad_token_id=50257)
        elif TOKENIZER == 'gpt2':
            config = GPT2Config(num_labels=1, pad_token_id=50256)
        elif TOKENIZER == 'wordpiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'sentencepiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'wordlevel':
            config = GPT2Config(num_labels=1, pad_token_id=3)           
        if MODEL_NAME == 'gpt2':
            MODEL = LinearGPT2.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        elif MODEL_NAME == 'gpt2sp':
            MODEL = GPT2SP.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names = data_processing(file_pair=file)
        train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, MODEL, test_file_names)
        del MODEL
        torch.cuda.empty_cache()            
        global OUTPUT
        with open('./results/' + str(file['train'][0]) + '_' + str(file['test'][0]) +'.txt', 'w+') as f:
            f.writelines(OUTPUT)
            print('results have been written into a text file!')
            OUTPUT = ""

                
if __name__ == "__main__":
    main()

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Add CA against object literals in function invocations
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[4550, 7257, 1028, 2134, 4187, 874, 287, 2163, 800, 20968, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [10260, 24150, 329, 2034, 7015, 1352, 13877, 284, 2034, 7015, 1352, 11112, 220, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [16447, 649, 19449, 32815, 329, 26144, 1074, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [16447, 4935, 31458, 14161, 7873, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [3791, 27850, 4935, 16884, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['appceleratorstudio'], 'test': ['appceleratorstudio']} .....
>>> epoch  0
 Average training MAE loss: 29.58
-
 Average eval MAE loss: 11.26
MAE:  10.987148
MdAE:  11.080142
>>> epoch  1
 Average training MAE loss: 4.32
-
 Average eval MAE loss: 2.97
MAE:  2.7040088
MdAE:  2.5397143
>>> epoch  2
 Average training MAE loss: 3.61
-
 Average eval MAE loss: 2.85
MAE:  3.1570585
MdAE:  3.041329
>>> epoch  3
 Average training MAE loss: 3.25
-
 Average eval MAE loss: 2.52
MAE:  2.3036551
MdAE:  1.8241332
>>> epoch  4
 Average training MAE loss: 2.70
-
 Average eval MAE loss: 2.75
MAE:  2.4968967
MdAE:  2.2525737
>>> epoch  5
 Average training MAE loss: 2.85
-
 Average eval MAE loss: 1.57
MAE:  1.4605458
MdAE:  1.1322608
>>> epoch  6
 Average training MAE loss: 2.61
-
 Average eval MAE loss: 2.34
MAE:  2.087717
MdAE:  1.9854324
>>> epoch  7
 Average training MAE loss: 2.89
-
 Average eval MAE loss: 1.98
MAE:  1.7706888
MdAE:  1.6635478
>>> epoch  8
 Average traini

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Add Copy URL actions to right-click context menu of Remote view for S3 files
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[4550, 17393, 10289, 4028, 284, 826, 12, 12976, 4732, 6859, 286, 21520, 1570, 329, 311, 18, 3696, 50256, 50256, 50256], [32, 457, 2271, 8404, 284, 1280, 257, 649, 4554, 286, 2346, 618, 4756, 3696, 2884, 3964, 19142, 50256, 50256, 50256], [19746, 3342, 46207, 318, 3402, 2354, 262, 3159, 13215, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [11909, 47, 5231, 42829, 24547, 329, 2560, 5050, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [6187, 16406, 281, 2939, 656, 262, 11532, 5464, 815, 2251, 281, 2939, 7621, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['aptanastudio'], 'test': ['aptanastudio']} .....
>>> epoch  0
 Average training MAE loss: 12.39
-
 Average eval MAE loss: 8.68
MAE:  6.8016005
MdAE:  4.7819023
>>> epoch  1
 Average training MAE loss: 7.47
-
 Average eval MAE loss: 5.06
MAE:  3.7691936
MdAE:  2.853842
>>> epoch  2
 Average training MAE loss: 4.44
-
 Average eval MAE loss: 3.56
MAE:  3.752133
MdAE:  3.5974903
>>> epoch  3
 Average training MAE loss: 4.24
-
 Average eval MAE loss: 3.67
MAE:  3.9397628
MdAE:  3.60608
>>> epoch  4
 Average training MAE loss: 3.89
-
 Average eval MAE loss: 5.38
MAE:  4.108749
MdAE:  2.9844902
>>> epoch  5
 Average training MAE loss: 3.91
-
 Average eval MAE loss: 3.87
MAE:  4.214387
MdAE:  3.3009753
>>> epoch  6
 Average training MAE loss: 2.90
-
 Average eval MAE loss: 4.28
MAE:  4.4876394
MdAE:  3.350463
>>> epoch  7
 Average training MAE loss: 2.37
-
 Average eval MAE loss: 4.56
MAE:  3.76555
MdAE:  2.6710098
>>> epoch  8
 Average training MAE loss: 2.40
-


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Allows CVS repo to timeout and report on locking issues
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[34934, 327, 20304, 29924, 284, 26827, 290, 989, 319, 22656, 2428, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [12154, 257, 1382, 284, 307, 4624, 379, 262, 1182, 286, 262, 1382, 16834, 986, 357, 273, 4370, 262, 16834, 1502], [23004, 407, 7448, 618, 28006, 10143, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [25685, 1891, 422, 27468, 5797, 284, 37252, 4382, 284, 2291, 412, 4462, 6115, 3817, 2482, 50256, 50256, 50256, 50256, 50256], [14490, 460, 766, 262, 285, 4005, 8265, 357, 8094, 312, 11, 24127, 312, 11, 2196, 8, 1028, 1123, 1410, 13]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['bamboo'], 'test': ['bamboo']} .....
>>> epoch  0
 Average training MAE loss: 14.97
-
 Average eval MAE loss: 2.95
MAE:  2.9147785
MdAE:  3.0826058
>>> epoch  1
 Average training MAE loss: 2.72
-
 Average eval MAE loss: 3.47
MAE:  3.4716907
MdAE:  3.317344
>>> epoch  2
 Average training MAE loss: 1.97
-
 Average eval MAE loss: 1.06
MAE:  1.0819849
MdAE:  0.8899708
>>> epoch  3
 Average training MAE loss: 1.45
-
 Average eval MAE loss: 0.79
MAE:  0.90542656
MdAE:  0.93209827
>>> epoch  4
 Average training MAE loss: 1.44
-
 Average eval MAE loss: 0.67
MAE:  0.762667
MdAE:  0.6214129
>>> epoch  5
 Average training MAE loss: 1.35
-
 Average eval MAE loss: 1.05
MAE:  1.048762
MdAE:  0.9692991
>>> epoch  6
 Average training MAE loss: 1.29
-
 Average eval MAE loss: 1.05
MAE:  1.0059149
MdAE:  0.9754555
>>> epoch  7
 Average training MAE loss: 1.11
-
 Average eval MAE loss: 1.41
MAE:  1.2593452
MdAE:  1.1155174
>>> epoch  8
 Average training MAE loss: 1.18
-
 Ave

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Line coverage data is inconsistent
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[13949, 5197, 1366, 318, 18326, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [19457, 6495, 1398, 6978, 318, 11491, 618, 6906, 319, 257, 17379, 290, 257, 1332, 12, 9491, 422, 262, 976, 285], [34149, 2041, 8875, 341, 5072, 26672, 284, 257, 2723, 26672, 28128, 274, 477, 2723, 0, 50256, 50256, 50256, 50256, 50256], [4550, 2446, 14, 26090, 1241, 8875, 341, 4634, 287, 1628, 6608, 2443, 220, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [12050, 6208, 5660, 19142, 1620, 1365, 290, 2245, 22656, 262, 12454, 4704, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['clover'], 'test': ['clover']} .....
>>> epoch  0
 Average training MAE loss: 22.71
-
 Average eval MAE loss: 2.39
MAE:  4.009734
MdAE:  2.1224427
>>> epoch  1
 Average training MAE loss: 4.06
-
 Average eval MAE loss: 1.83
MAE:  3.6147738
MdAE:  1.1553788
>>> epoch  2
 Average training MAE loss: 3.85
-
 Average eval MAE loss: 1.77
MAE:  3.6172078
MdAE:  1.2707124
>>> epoch  3
 Average training MAE loss: 3.47
-
 Average eval MAE loss: 2.27
MAE:  4.109285
MdAE:  2.2994049
>>> epoch  4
 Average training MAE loss: 3.70
-
 Average eval MAE loss: 2.23
MAE:  3.9261103
MdAE:  2.2204595
>>> epoch  5
 Average training MAE loss: 3.21
-
 Average eval MAE loss: 1.85
MAE:  3.556704
MdAE:  1.0333395
>>> epoch  6
 Average training MAE loss: 2.86
-
 Average eval MAE loss: 3.18
MAE:  4.7024283
MdAE:  3.044798
>>> epoch  7
 Average training MAE loss: 3.19
-
 Average eval MAE loss: 2.04
MAE:  3.7563734
MdAE:  1.5892489
>>> epoch  8
 Average training MAE loss: 2.33
-
 Averag

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Transition git repositories to Stash
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[8291, 653, 17606, 38072, 284, 520, 1077, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [19006, 1096, 14848, 4365, 2643, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [11505, 510, 30948, 2257, 3788, 21898, 8341, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [8291, 653, 284, 7326, 23079, 20396, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [4550, 28486, 12, 3106, 6436, 7509, 284, 2212, 62, 16680, 361, 270, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['datamanagement'], 'test': ['datamanagement']} .....
>>> epoch  0
 Average training MAE loss: 19.43
-
 Average eval MAE loss: 6.01
MAE:  6.4052997
MdAE:  3.1574316
>>> epoch  1
 Average training MAE loss: 8.83
-
 Average eval MAE loss: 5.89
MAE:  6.198457
MdAE:  3.340217
>>> epoch  2
 Average training MAE loss: 8.18
-
 Average eval MAE loss: 5.36
MAE:  5.6791677
MdAE:  2.6097844
>>> epoch  3
 Average training MAE loss: 6.84
-
 Average eval MAE loss: 5.08
MAE:  5.4207644
MdAE:  2.1257296
>>> epoch  4
 Average training MAE loss: 6.33
-
 Average eval MAE loss: 6.17
MAE:  6.2639666
MdAE:  3.5367558
>>> epoch  5
 Average training MAE loss: 6.08
-
 Average eval MAE loss: 6.62
MAE:  6.960117
MdAE:  3.0643806
>>> epoch  6
 Average training MAE loss: 5.57
-
 Average eval MAE loss: 5.39
MAE:  5.5899777
MdAE:  2.3171682
>>> epoch  7
 Average training MAE loss: 5.11
-
 Average eval MAE loss: 5.73
MAE:  5.78188
MdAE:  2.3647475
>>> epoch  8
 Average training MAE loss:

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Document logging framework
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[24941, 18931, 9355, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [35265, 329, 2829, 19698, 286, 1628, 2196, 3146, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [33, 12171, 3440, 25, 49899, 4388, 360, 5330, 18839, 26151, 286, 838, 22737, 286, 347, 6581, 2695, 50256, 50256, 50256], [12889, 7156, 17, 42, 25, 7412, 11315, 2139, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [12889, 7156, 17, 42, 25, 7412, 4382, 2139, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['duracloud'], 'test': ['duracloud']} .....
>>> epoch  0
 Average training MAE loss: 24.09
-
 Average eval MAE loss: 1.52
MAE:  1.4586334
MdAE:  1.3957963
>>> epoch  1
 Average training MAE loss: 2.12
-
 Average eval MAE loss: 1.76
MAE:  1.6644847
MdAE:  1.3365394
>>> epoch  2
 Average training MAE loss: 1.68
-
 Average eval MAE loss: 1.15
MAE:  1.1322147
MdAE:  1.0121033
>>> epoch  3
 Average training MAE loss: 1.38
-
 Average eval MAE loss: 0.89
MAE:  0.8252006
MdAE:  0.5317217
>>> epoch  4
 Average training MAE loss: 1.23
-
 Average eval MAE loss: 0.85
MAE:  0.7955237
MdAE:  0.56172746
>>> epoch  5
 Average training MAE loss: 1.14
-
 Average eval MAE loss: 0.91
MAE:  0.8237945
MdAE:  0.6069233
>>> epoch  6
 Average training MAE loss: 1.09
-
 Average eval MAE loss: 0.85
MAE:  0.80624837
MdAE:  0.50868374
>>> epoch  7
 Average training MAE loss: 0.93
-
 Average eval MAE loss: 1.05
MAE:  0.89340746
MdAE:  0.5959791
>>> epoch  8
 Average training MAE loss: 

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  As a JIRA Administrator I would like to be able to change the trigger of the night service
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[1722, 257, 449, 40, 3861, 22998, 314, 561, 588, 284, 307, 1498, 284, 1487, 262, 7616, 286, 262, 1755, 2139], [1722, 257, 449, 40, 3861, 22998, 314, 561, 588, 284, 307, 1498, 284, 1487, 262, 7616, 286, 262, 1755, 2139], [46189, 3992, 1818, 47217, 743, 19122, 351, 584, 20652, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [46189, 3992, 1818, 47217, 743, 19122, 351, 584, 20652, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [4550, 2420, 284, 262, 2449, 576, 39266, 366, 44651, 4935, 1, 3275, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['jirasoftware'], 'test': ['jirasoftware']} .....
>>> epoch  0
 Average training MAE loss: 31.09
-
 Average eval MAE loss: 10.98
MAE:  10.867614
MdAE:  11.423271
>>> epoch  1
 Average training MAE loss: 4.01
-
 Average eval MAE loss: 1.61
MAE:  1.6372194
MdAE:  1.2372532
>>> epoch  2
 Average training MAE loss: 3.26
-
 Average eval MAE loss: 1.76
MAE:  1.6942437
MdAE:  1.425348
>>> epoch  3
 Average training MAE loss: 2.92
-
 Average eval MAE loss: 2.36
MAE:  2.1969442
MdAE:  1.8757067
>>> epoch  4
 Average training MAE loss: 3.64
-
 Average eval MAE loss: 2.10
MAE:  1.9119854
MdAE:  1.3133311
>>> epoch  5
 Average training MAE loss: 2.96
-
 Average eval MAE loss: 3.61
MAE:  3.4226427
MdAE:  3.2411823
>>> epoch  6
 Average training MAE loss: 2.15
-
 Average eval MAE loss: 1.91
MAE:  1.9502403
MdAE:  1.5461559
>>> epoch  7
 Average training MAE loss: 2.77
-
 Average eval MAE loss: 2.09
MAE:  1.9232363
MdAE:  1.513504
>>> epoch  8
 Average training MAE loss:

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Report executor terminations to framework schedulers.
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[19100, 3121, 273, 5651, 602, 284, 9355, 6038, 377, 364, 13, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [44, 274, 418, 11778, 815, 12940, 3121, 669, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [3109, 3455, 309, 1921, 42, 62, 7708, 4146, 1961, 1738, 284, 15183, 19653, 13, 50256, 50256, 50256, 50256, 50256, 50256], [23410, 2049, 9355, 10143, 284, 1057, 2233, 284, 2089, 9701, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [7583, 2198, 705, 18300, 814, 1377, 19509, 14269, 1377, 301, 1886, 6, 287, 1281, 12, 19023, 82, 13, 9078, 13]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['mesos'], 'test': ['mesos']} .....
>>> epoch  0
 Average training MAE loss: 18.26
-
 Average eval MAE loss: 2.69
MAE:  2.6786115
MdAE:  2.8393006
>>> epoch  1
 Average training MAE loss: 2.21
-
 Average eval MAE loss: 1.34
MAE:  1.203958
MdAE:  0.6717353
>>> epoch  2
 Average training MAE loss: 2.11
-
 Average eval MAE loss: 1.45
MAE:  1.3714885
MdAE:  0.9744022
>>> epoch  3
 Average training MAE loss: 1.83
-
 Average eval MAE loss: 1.27
MAE:  1.1652453
MdAE:  0.8726387
>>> epoch  4
 Average training MAE loss: 1.72
-
 Average eval MAE loss: 2.32
MAE:  2.1162887
MdAE:  2.1281857
>>> epoch  5
 Average training MAE loss: 1.83
-
 Average eval MAE loss: 1.34
MAE:  1.2206208
MdAE:  1.0420841
>>> epoch  6
 Average training MAE loss: 1.69
-
 Average eval MAE loss: 2.10
MAE:  1.9624155
MdAE:  1.7620015
>>> epoch  7
 Average training MAE loss: 1.48
-
 Average eval MAE loss: 1.58
MAE:  1.5695173
MdAE:  1.2160554
>>> epoch  8
 Average training MAE loss: 1.36
-
 Avera

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Forum: Per-discussion subscription
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[1890, 388, 25, 2448, 12, 15410, 11956, 14569, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [1890, 388, 25, 14883, 416, 304, 12, 4529, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [12154, 7799, 284, 11986, 1728, 7032, 287, 6831, 3842, 355, 2672, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [1890, 388, 25, 5120, 284, 1306, 4704, 2792, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [32, 779, 7034, 338, 1781, 1351, 318, 9277, 3093, 3614, 351, 645, 835, 286, 4379, 1844, 1351, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['moodle'], 'test': ['moodle']} .....
>>> epoch  0
 Average training MAE loss: 35.59
-
 Average eval MAE loss: 14.28
MAE:  8.938059
MdAE:  9.036295
>>> epoch  1
 Average training MAE loss: 12.90
-
 Average eval MAE loss: 15.02
MAE:  5.887364
MdAE:  4.4773145
>>> epoch  2
 Average training MAE loss: 13.48
-
 Average eval MAE loss: 14.33
MAE:  7.248261
MdAE:  7.0820026
>>> epoch  3
 Average training MAE loss: 12.30
-
 Average eval MAE loss: 14.44
MAE:  11.0118065
MdAE:  10.478514
>>> epoch  4
 Average training MAE loss: 10.98
-
 Average eval MAE loss: 14.23
MAE:  9.541613
MdAE:  8.350607
>>> epoch  5
 Average training MAE loss: 9.48
-
 Average eval MAE loss: 13.91
MAE:  8.729458
MdAE:  6.6272516
>>> epoch  6
 Average training MAE loss: 8.71
-
 Average eval MAE loss: 14.19
MAE:  11.955379
MdAE:  10.179546
>>> epoch  7
 Average training MAE loss: 8.46
-
 Average eval MAE loss: 13.93
MAE:  9.985378
MdAE:  7.129906
>>> epoch  8
 Average training MAE loss: 8.50
-

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Implement true multicast functionality for <all> processor
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[3546, 26908, 2081, 47368, 459, 11244, 329, 1279, 439, 29, 12649, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [13921, 337, 2261, 1104, 1395, 32, 8611, 319, 1395, 32, 4133, 973, 416, 257, 8225, 2134, 7515, 5633, 50256, 50256], [818, 9152, 21201, 8265, 287, 337, 2261, 4755, 6082, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [28446, 12, 17212, 815, 900, 6631, 21437, 351, 938, 6631, 2722, 878, 7216, 284, 23641, 48, 50256, 50256, 50256, 50256], [28446, 12, 17212, 815, 1104, 18305, 516, 779, 2663, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['mule'], 'test': ['mule']} .....
>>> epoch  0
 Average training MAE loss: 23.88
-
 Average eval MAE loss: 7.08
MAE:  7.265451
MdAE:  7.4112415
>>> epoch  1
 Average training MAE loss: 4.07
-
 Average eval MAE loss: 4.16
MAE:  4.2881823
MdAE:  4.134663
>>> epoch  2
 Average training MAE loss: 3.38
-
 Average eval MAE loss: 2.91
MAE:  3.0736413
MdAE:  3.046388
>>> epoch  3
 Average training MAE loss: 2.95
-
 Average eval MAE loss: 3.36
MAE:  3.3327854
MdAE:  2.5674357
>>> epoch  4
 Average training MAE loss: 3.53
-
 Average eval MAE loss: 2.87
MAE:  2.8163564
MdAE:  2.0070612
>>> epoch  5
 Average training MAE loss: 3.22
-
 Average eval MAE loss: 2.51
MAE:  2.6810136
MdAE:  2.632815
>>> epoch  6
 Average training MAE loss: 2.38
-
 Average eval MAE loss: 2.73
MAE:  2.9150789
MdAE:  2.330892
>>> epoch  7
 Average training MAE loss: 2.16
-
 Average eval MAE loss: 2.54
MAE:  2.593792
MdAE:  2.005897
>>> epoch  8
 Average training MAE loss: 1.96
-
 Average eval 

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Support for request/reply
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[15514, 329, 2581, 14, 47768, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [34, 34574, 1330, 257, 11733, 1628, 422, 15151, 1231, 8563, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [29238, 284, 7349, 2438, 466, 407, 651, 3024, 12380, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [3118, 540, 284, 751, 257, 2882, 618, 4441, 257, 1218, 5202, 287, 262, 976, 285, 11125, 50256, 50256, 50256, 50256], [28531, 43076, 287, 262, 23735, 1570, 389, 407, 852, 4615, 618, 345, 4781, 477, 262, 4847, 286, 257, 1611, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['mulestudio'], 'test': ['mulestudio']} .....
>>> epoch  0
 Average training MAE loss: 23.79
-
 Average eval MAE loss: 5.02
MAE:  4.024889
MdAE:  3.929449
>>> epoch  1
 Average training MAE loss: 4.12
-
 Average eval MAE loss: 4.69
MAE:  3.6797862
MdAE:  2.322072
>>> epoch  2
 Average training MAE loss: 3.02
-
 Average eval MAE loss: 4.91
MAE:  3.9395378
MdAE:  3.0281963
>>> epoch  3
 Average training MAE loss: 3.40
-
 Average eval MAE loss: 4.65
MAE:  3.6537077
MdAE:  3.0081406
>>> epoch  4
 Average training MAE loss: 2.85
-
 Average eval MAE loss: 4.62
MAE:  3.6351418
MdAE:  2.5594716
>>> epoch  5
 Average training MAE loss: 3.06
-
 Average eval MAE loss: 4.69
MAE:  3.7014039
MdAE:  2.552209
>>> epoch  6
 Average training MAE loss: 2.59
-
 Average eval MAE loss: 4.82
MAE:  3.7711344
MdAE:  2.8268561
>>> epoch  7
 Average training MAE loss: 2.54
-
 Average eval MAE loss: 4.85
MAE:  3.7334583
MdAE:  3.2661743
>>> epoch  8
 Average training MAE loss: 2.73
-

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  HDFS ItemWriter
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[39, 8068, 50, 9097, 34379, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [39, 8068, 50, 7231, 3597, 31904, 6097, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [29239, 33432, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [51, 29291, 1366, 4645, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [44387, 6404, 554, 3495, 295, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['springxd'], 'test': ['springxd']} .....
>>> epoch  0
 Average training MAE loss: 13.94
-
 Average eval MAE loss: 2.64
MAE:  2.7572246
MdAE:  2.9196756
>>> epoch  1
 Average training MAE loss: 3.26
-
 Average eval MAE loss: 1.82
MAE:  1.7190495
MdAE:  1.7989675
>>> epoch  2
 Average training MAE loss: 2.64
-
 Average eval MAE loss: 2.14
MAE:  2.1704345
MdAE:  2.139295
>>> epoch  3
 Average training MAE loss: 2.18
-
 Average eval MAE loss: 2.03
MAE:  2.0475707
MdAE:  1.8719149
>>> epoch  4
 Average training MAE loss: 2.19
-
 Average eval MAE loss: 2.00
MAE:  1.9996293
MdAE:  1.7158365
>>> epoch  5
 Average training MAE loss: 2.03
-
 Average eval MAE loss: 2.25
MAE:  2.3316114
MdAE:  2.3409543
>>> epoch  6
 Average training MAE loss: 2.21
-
 Average eval MAE loss: 1.78
MAE:  1.7102009
MdAE:  1.294445
>>> epoch  7
 Average training MAE loss: 1.98
-
 Average eval MAE loss: 1.82
MAE:  1.7023258
MdAE:  1.1309569
>>> epoch  8
 Average training MAE loss: 1.83
-
 

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  SQL Server Single Sign On Support doesn't work in data profiler repository connections
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[17861, 9652, 14206, 5865, 1550, 7929, 1595, 470, 670, 287, 1366, 1534, 5329, 16099, 8787, 50256, 50256, 50256, 50256, 50256], [27914, 734, 15180, 287, 8373, 8893, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [32048, 3781, 1058, 1321, 62, 15952, 2611, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [48101, 3781, 21337, 389, 366, 2164, 16548, 503, 1, 290, 2314, 307, 973, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [1, 7680, 12515, 15274, 1, 6859, 857, 407, 3359, 319, 257, 3084, 3781, 357, 4480, 360, 48, 14330, 8, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['talenddataquality'], 'test': ['talenddataquality']} .....
>>> epoch  0
 Average training MAE loss: 27.47
-
 Average eval MAE loss: 10.40
MAE:  12.592496
MdAE:  13.384028
>>> epoch  1
 Average training MAE loss: 4.96
-
 Average eval MAE loss: 3.66
MAE:  3.189951
MdAE:  3.0844078
>>> epoch  2
 Average training MAE loss: 3.69
-
 Average eval MAE loss: 3.66
MAE:  3.4299228
MdAE:  3.3514829
>>> epoch  3
 Average training MAE loss: 3.64
-
 Average eval MAE loss: 3.87
MAE:  2.5538354
MdAE:  1.8881094
>>> epoch  4
 Average training MAE loss: 3.49
-
 Average eval MAE loss: 3.59
MAE:  3.1884162
MdAE:  2.7527297
>>> epoch  5
 Average training MAE loss: 3.10
-
 Average eval MAE loss: 4.14
MAE:  4.761494
MdAE:  4.660549
>>> epoch  6
 Average training MAE loss: 3.08
-
 Average eval MAE loss: 3.50
MAE:  2.6498954
MdAE:  2.0023987
>>> epoch  7
 Average training MAE loss: 2.75
-
 Average eval MAE loss: 3.69
MAE:  3.398387
MdAE:  2.7549253
>>> epoch  8
 Average training M

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Investigation: S1 Improved user experience with TOS/TIS/ESB Studio
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[19070, 7065, 25, 311, 16, 24125, 2836, 1998, 351, 309, 2640, 14, 51, 1797, 14, 1546, 33, 11733, 50256, 50256], [19070, 10055, 25, 311, 17, 7320, 4809, 46333, 290, 10131, 9352, 287, 309, 2640, 14, 51, 1797, 14, 41501, 50256], [19070, 10055, 25, 311, 18, 24125, 23735, 6060, 49500, 36109, 287, 309, 2640, 14, 51, 1797, 14, 41501, 50256, 50256], [19070, 10055, 25, 311, 22, 309, 1797, 11923, 87, 1912, 319, 262, 4809, 19239, 50256, 50256, 50256, 50256, 50256, 50256], [19070, 10055, 25, 33084, 13472, 649, 6443, 290, 6459, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['talendesb'], 'test': ['talendesb']} .....
>>> epoch  0
 Average training MAE loss: 23.50
-
 Average eval MAE loss: 8.62
MAE:  8.415205
MdAE:  8.689943
>>> epoch  1
 Average training MAE loss: 2.79
-
 Average eval MAE loss: 1.87
MAE:  1.8743789
MdAE:  1.529362
>>> epoch  2
 Average training MAE loss: 1.29
-
 Average eval MAE loss: 0.95
MAE:  0.94719523
MdAE:  0.64888406
>>> epoch  3
 Average training MAE loss: 1.12
-
 Average eval MAE loss: 0.95
MAE:  0.9285971
MdAE:  0.641299
>>> epoch  4
 Average training MAE loss: 1.29
-
 Average eval MAE loss: 0.88
MAE:  0.88481265
MdAE:  0.6023309
>>> epoch  5
 Average training MAE loss: 1.00
-
 Average eval MAE loss: 0.87
MAE:  0.89930934
MdAE:  0.6265737
>>> epoch  6
 Average training MAE loss: 0.76
-
 Average eval MAE loss: 1.04
MAE:  0.9560489
MdAE:  0.6310651
>>> epoch  7
 Average training MAE loss: 0.72
-
 Average eval MAE loss: 0.88
MAE:  0.87928915
MdAE:  0.5157157
>>> epoch  8
 Average training MAE loss: 0.6

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Android: While debugger is running, cannot back out and go back into an app
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[25934, 25, 2893, 49518, 318, 2491, 11, 2314, 736, 503, 290, 467, 736, 656, 281, 598, 50256, 50256, 50256, 50256], [25934, 25, 2034, 9641, 1239, 2077, 422, 256, 544, 381, 13, 19875, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [25934, 25, 15443, 6608, 389, 5445, 329, 7412, 7680, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [25934, 25, 11851, 5657, 318, 9066, 618, 1336, 9612, 22870, 3159, 318, 973, 13, 50256, 50256, 50256, 50256, 50256, 50256], [35742, 25, 12697, 290, 4268, 3975, 6757, 37647, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['titanium'], 'test': ['titanium']} .....
>>> epoch  0
 Average training MAE loss: 27.16
-
 Average eval MAE loss: 5.68
MAE:  5.756569
MdAE:  5.542074
>>> epoch  1
 Average training MAE loss: 5.64
-
 Average eval MAE loss: 5.80
MAE:  5.460118
MdAE:  5.171556
>>> epoch  2
 Average training MAE loss: 4.62
-
 Average eval MAE loss: 2.43
MAE:  2.2119377
MdAE:  2.1355476
>>> epoch  3
 Average training MAE loss: 3.72
-
 Average eval MAE loss: 2.71
MAE:  2.430834
MdAE:  1.6455433
>>> epoch  4
 Average training MAE loss: 3.49
-
 Average eval MAE loss: 2.54
MAE:  2.4116826
MdAE:  1.6284752
>>> epoch  5
 Average training MAE loss: 3.22
-
 Average eval MAE loss: 2.74
MAE:  2.496672
MdAE:  2.13734
>>> epoch  6
 Average training MAE loss: 2.96
-
 Average eval MAE loss: 3.03
MAE:  2.76503
MdAE:  2.0329833
>>> epoch  7
 Average training MAE loss: 2.78
-
 Average eval MAE loss: 3.05
MAE:  2.8152552
MdAE:  2.092863
>>> epoch  8
 Average training MAE loss: 2.55
-
 Average e

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['dense1.weight', 'dense2.weight', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_data = train_data.append(df)


### text : title
Input data feed :::  Asset data does not correctly obey contextual ownership like the entity
within project split!
using pretrained gpt-2 tokenizer
using pretrained gpt-2 tokenizer
[[45869, 1366, 857, 407, 9380, 22389, 38356, 9238, 588, 262, 9312, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [3109, 3455, 14976, 11241, 379, 262, 30617, 14249, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [22069, 40087, 12405, 5860, 2104, 4947, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], [40613, 4731, 1595, 470, 4781, 281, 9312, 3119, 357, 8423, 1595, 470, 670, 2035, 8, 50256, 50256, 50256, 50256, 50256], [35857, 32053, 2836, 11241, 1839, 470, 670, 319, 1220, 27604, 14, 18417, 14, 1326, 50256, 50256, 50256, 50256, 50256, 50256]]
using pretrained gpt-2 tokenizer




Start training for  {'train': ['usergrid'], 'test': ['usergrid']} .....
>>> epoch  0
 Average training MAE loss: 21.67
-
 Average eval MAE loss: 1.72
MAE:  1.7347909
MdAE:  1.4411049
>>> epoch  1
 Average training MAE loss: 1.47
-
 Average eval MAE loss: 1.32
MAE:  1.4216458
MdAE:  1.0833669
>>> epoch  2
 Average training MAE loss: 1.26
-
 Average eval MAE loss: 0.92
MAE:  1.2185626
MdAE:  0.725698
>>> epoch  3
 Average training MAE loss: 1.24
-
 Average eval MAE loss: 2.69
MAE:  2.7547362
MdAE:  2.757624
>>> epoch  4
 Average training MAE loss: 1.23
-
 Average eval MAE loss: 1.09
MAE:  1.3646202
MdAE:  1.3319106
>>> epoch  5
 Average training MAE loss: 1.01
-
 Average eval MAE loss: 0.83
MAE:  1.211044
MdAE:  0.9309807
>>> epoch  6
 Average training MAE loss: 0.85
-
 Average eval MAE loss: 0.95
MAE:  1.296095
MdAE:  1.0008197
>>> epoch  7
 Average training MAE loss: 0.83
-
 Average eval MAE loss: 0.91
MAE:  1.2604004
MdAE:  1.1898375
>>> epoch  8
 Average training MAE loss: 0.90
-
 Av

### Cross Project Training Script - Within Repository

In [None]:
global WITHIN_PROJECT
WITHIN_PROJECT = False

# within repo
TRAIN_TEST_FILE_PAIRS = [
                        {'train': ['mesos'], 'test': ['usergrid']},
                        {'train': ['usergrid'], 'test': ['mesos']},
                        {'train': ['appceleratorstudio'], 'test': ['aptanastudio']},
                        {'train': ['appceleratorstudio'], 'test': ['titanium']},
                        {'train': ['titanium'], 'test': ['appceleratorstudio']},
                        {'train': ['aptanastudio'], 'test': ['titanium']},
                        {'train': ['mule'], 'test': ['mulestudio']},
                        {'train': ['mulestudio'], 'test': ['mule']}
                        ]


def main():
    global TRAIN_TEST_FILE_PAIRS, MODEL, TOKENIZER, MODEL_NAME
    for file in TRAIN_TEST_FILE_PAIRS:
        if TOKENIZER == 'bbpe':
            config = GPT2Config(num_labels=1, pad_token_id=50257)
        elif TOKENIZER == 'gpt2':
            config = GPT2Config(num_labels=1, pad_token_id=50256)
        elif TOKENIZER == 'wordpiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'sentencepiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'wordlevel':
            config = GPT2Config(num_labels=1, pad_token_id=3)           
        if MODEL_NAME == 'gpt2':
            MODEL = LinearGPT2.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        elif MODEL_NAME == 'gpt2sp':
            MODEL = GPT2SP.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names = data_processing(file_pair=file)
        train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, MODEL, test_file_names)
        del MODEL
        torch.cuda.empty_cache()            
        global OUTPUT
        with open('./results/' + str(file['train'][0]) + '_' + str(file['test'][0]) +'.txt', 'w+') as f:
            f.writelines(OUTPUT)
            print('results have been written into a text file!')
            OUTPUT = ""

                
if __name__ == "__main__":
    main()

### Cross Project Training Script - Cross Repository

In [None]:
global WITHIN_PROJECT
WITHIN_PROJECT = False

# cross repo
TRAIN_TEST_FILE_PAIRS = [
                        {'train': ['clover'], 'test': ['usergrid']},
                        {'train': ['talendesb'], 'test': ['mesos']},
                        {'train': ['talenddataquality'], 'test': ['aptanastudio']},
                        {'train': ['mule'], 'test': ['titanium']},
                        {'train': ['talenddataquality'], 'test': ['appceleratorstudio']},
                        {'train': ['mulestudio'], 'test': ['titanium']},
                        {'train': ['appceleratorstudio'], 'test': ['mulestudio']},
                        {'train': ['appceleratorstudio'], 'test': ['mule']}
                        ]


def main():
    global TRAIN_TEST_FILE_PAIRS, MODEL, TOKENIZER, MODEL_NAME
    for file in TRAIN_TEST_FILE_PAIRS:
        if TOKENIZER == 'gpt2':
            config = GPT2Config(num_labels=1, pad_token_id=50256)
        elif TOKENIZER == 'wordpiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'sentencepiece':
            config = GPT2Config(num_labels=1, pad_token_id=0)
        elif TOKENIZER == 'wordlevel':
            config = GPT2Config(num_labels=1, pad_token_id=3)           
        if MODEL_NAME == 'gpt2':
            MODEL = LinearGPT2.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        elif MODEL_NAME == 'gpt2sp':
            MODEL = GPT2SP.from_pretrained('gpt2', config=config)
            MODEL.cuda()
        file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names = data_processing(file_pair=file)
        train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, MODEL, test_file_names)
        del MODEL
        torch.cuda.empty_cache()            
        global OUTPUT
        with open('./results/' + str(file['train'][0]) + '_' + str(file['test'][0]) +'.txt', 'w+') as f:
            f.writelines(OUTPUT)
            print('results have been written into a text file!')
            OUTPUT = ""

                
if __name__ == "__main__":
    main()