In [1]:
import comet_ml

In [2]:
import os
os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/mnt/Research/peter-research/peter_devine_nlp_models"
os.environ["PYTORCH_TRANSFORMERS_CACHE"] = "/mnt/Research/peter-research/peter_devine_nlp_models"
os.environ['COMET_API_KEY'] = "cQ08Sxaq3jBSiwFrzNdIuOCv5"

In [3]:
from downloader import download_datasets
from dataset_preparation import prepare_datasets

In [4]:
# 'chen_2014', 'ciurumelea_2017', 'di_sorbo_2016', 'guzman_2015', 'maalej_2016', 'scalabrino_2017', 'tizard_2019', 'williams_2017'

dataset_list = ['chen_2014', 'ciurumelea_2017', 'di_sorbo_2016', 'guzman_2015', 'maalej_2016', 'scalabrino_2017', 'tizard_2019', 'williams_2017']
label_granularity = "requirements_relevance"
is_multiclass = not (label_granularity == "requirements_relevance")
problem_type = "multi_label_classification" if is_multiclass else "single_label_classification"
model_name = "distilbert-base-uncased"

In [5]:
# download_datasets(dataset_list, label_granularity=label_granularity)

In [6]:
train_dataset, val_dataset, test_dataset, label_order = prepare_datasets(dataset_list, label_granularity, model_name, is_multiclass)

chen_2014
ciurumelea_2017
di_sorbo_2016
guzman_2015
maalej_2016
scalabrino_2017
tizard_2019
williams_2017


In [7]:
label_order = list(label_order)
label_order.remove("text")
label_order

['irrelevant', 'relevant']

In [8]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoConfig
import torch
from sklearn.metrics import brier_score_loss, accuracy_score, precision_recall_fscore_support, ndcg_score, roc_auc_score
import numpy as np

def get_aprf1(labels, pred_label):
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred_label, average='binary', zero_division=0)
    acc = accuracy_score(labels, pred_label)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

def get_cutoff_metrics(labels, preds, cutoff):
    pred_label = preds > cutoff
    metrics = get_aprf1(labels, pred_label)
    return {f"{k}_{cutoff}": v for k, v in metrics.items()}

def compute_metrics(eval_pred):
    
    logits, labels = eval_pred
    
    results = {}
    
    if is_multiclass:
        sig = torch.nn.Sigmoid()
        perc_preds = sig(torch.Tensor(logits)).numpy()
        num_pred_classes = logits.shape[1]
        for class_num in range(num_pred_classes):
            class_name = label_order[class_num]

            class_perc_preds = perc_preds[:, class_num]
            class_labels = labels[:, class_num]

            results[f"{class_name}_roc_auc"] = roc_auc_score(class_labels, class_perc_preds)
    #         for i in range(1,10):
    #             cutoff_metrics = get_cutoff_metrics(class_labels, class_perc_preds, i / 10)
    #             cutoff_metrics = {f"{class_name}_{k}":v for k, v in cutoff_metrics.items()}
    #             results.update(cutoff_metrics)

            cutoff_metrics = get_cutoff_metrics(class_labels, class_perc_preds, 0.5)
            cutoff_metrics = {f"{class_name}_{k}":v for k, v in cutoff_metrics.items()}
            results.update(cutoff_metrics)
    else:
        
        predictions = np.argmax(logits, axis=-1)
        results = get_aprf1(labels, predictions)
        
    return results

training_args = TrainingArguments(
    output_dir=f'./results_{label_granularity}',          # output directory
    num_train_epochs=6,              # total number of training epochs
    per_device_train_batch_size=32,  # batch size per device during training
    per_device_eval_batch_size=64,   # batch size for evaluation
    evaluation_strategy="epoch",
    warmup_steps=0,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir=f'./logs_{label_granularity}',            # directory for storing logs
    metric_for_best_model="loss",
    logging_steps=10,
    greater_is_better=False,
    load_best_model_at_end=True,
    fp16=True,
)

config = AutoConfig.from_pretrained(model_name, cache_dir="/mnt/Research/peter-research/peter_devine_nlp_models")
num_labels = 3 if label_granularity == "bug_feature_other" else 2
config.num_labels = num_labels
config.problem_type = problem_type
model = AutoModelForSequenceClassification.from_config(config)

trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,             # evaluation dataset
    compute_metrics=compute_metrics,
)

trainer.train()
trainer.save_model()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/peter-devine/huggingface/23b0dba97257416ab0325ee61a8e2b91



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4591,0.425488,0.798138,0.760093,0.748757,0.771778
2,0.3454,0.411059,0.81398,0.777929,0.769676,0.786362
3,0.3021,0.429159,0.811857,0.762768,0.79862,0.729996
4,0.2982,0.427335,0.820186,0.787411,0.771764,0.803705
5,0.2751,0.460208,0.818226,0.778596,0.785944,0.771384
6,0.2006,0.492387,0.816593,0.783163,0.7676,0.799369




In [None]:
model.config.problem_type

In [None]:
train_dataset[:10]

In [None]:
import torch            
    
loss_fct = torch.nn.BCEWithLogitsLoss()

In [None]:
logits = model(train_dataset[:10]['input_ids'].to(torch.device("cuda")), train_dataset[:10]['attention_mask'].to(torch.device("cuda")))

In [None]:
loss = loss_fct(logits.logits, train_dataset[:10]["labels"].type(torch.float).to(torch.device("cuda")))

In [None]:
logits

In [9]:
from dataset_downloaders.label_mappings import relevance_dataset_mappings

relevance_dataset_mappings.keys()

dict_keys(['chen_2014', 'ciurumelea_2017', 'di_sorbo_2016', 'guzman_2015', 'maalej_2016', 'scalabrino_2017', 'tizard_2019', 'williams_2017'])

In [None]:
sig(logits.logits)

In [12]:
import pandas as pd
import numpy as np

In [14]:
np.argmax(pd.read_csv(os.path.join("./data", f"guzman_2015_requirements_relevance.csv")).drop("text", axis=1).values, axis=1)

array([1, 0, 1, ..., 0, 0, 1])