<a href="https://colab.research.google.com/github/NaziaToma/BugType-and-BugFix-Predictor/blob/main/BugFix_BERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers torch



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

# Load datasets
train_df = pd.read_csv('/content/drive/MyDrive/BugFix_TrainingSet.csv')
test_df = pd.read_csv('/content/drive/MyDrive/BugFix_Testset.csv')


In [None]:
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset

In [None]:
# Fill NaN values with empty strings
train_df.fillna('', inplace=True)
test_df.fillna('', inplace=True)

In [None]:
# Combine Short Description, bug report, and comments into a single text feature for each dataset
train_df['combined_text'] = train_df['Short Description'] + ' ' + train_df['Bug Report'] + ' ' + train_df[train_df.columns[4:]].apply(lambda x: ' '.join(x.values.tolist()), axis=1)
test_df['combined_text'] = test_df['Short Description'] + ' ' + test_df['Bug Report'] + ' ' + test_df[test_df.columns[4:]].apply(lambda x: ' '.join(x.values.tolist()), axis=1)

In [None]:
class_names = ['FIXED', 'WONTFIX', 'INVALID']

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class TextDataset(Dataset):
    def __init__(self, tokenizer, texts, labels):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=128)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
label_to_id = {label: idx for idx, label in enumerate(class_names)}

train_labels = train_df['Resolution'].map(label_to_id).tolist()
test_labels = test_df['Resolution'].map(label_to_id).tolist()

In [None]:
train_dataset = TextDataset(tokenizer, train_df['combined_text'].tolist(), train_labels)
test_dataset = TextDataset(tokenizer, test_df['combined_text'].tolist(), test_labels)

In [None]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)  # 3 classes

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
pip install accelerate -U



In [None]:
pip install transformers[torch]



In [None]:
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    recall = recall_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    f1 = f1_score(labels, preds, average='weighted')
    return {
        'accuracy': acc,
        'recall': recall,
        'precision': precision,
        'f1': f1
    }


In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="steps",
    save_strategy="steps",
    logging_dir='./logs',
    logging_steps=50,  # Logging every 50 steps
    load_best_model_at_end=True,
    eval_steps=50,
    save_steps=50
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer.train()


Step,Training Loss,Validation Loss,Accuracy,Recall,Precision,F1
50,1.0817,1.152682,0.368056,0.368056,0.135465,0.19804
100,1.0509,1.14297,0.368056,0.368056,0.135465,0.19804
150,1.0053,1.121218,0.368056,0.368056,0.135465,0.19804
200,0.9258,1.337974,0.368056,0.368056,0.135465,0.19804
250,0.9955,1.098873,0.430556,0.430556,0.304045,0.329267
300,1.0002,1.059372,0.472222,0.472222,0.340195,0.390179
350,0.8543,1.139174,0.4375,0.4375,0.295409,0.351428
400,0.9127,1.036343,0.493056,0.493056,0.358455,0.408141
450,0.845,1.03592,0.5,0.5,0.649901,0.444144


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


TrainOutput(global_step=462, training_loss=0.9625172387985956, metrics={'train_runtime': 6736.3882, 'train_samples_per_second': 0.548, 'train_steps_per_second': 0.069, 'total_flos': 242722127854080.0, 'train_loss': 0.9625172387985956, 'epoch': 3.0})

In [None]:
results = trainer.evaluate()
print(results)

{'eval_loss': 1.0359196662902832, 'eval_accuracy': 0.5, 'eval_recall': 0.5, 'eval_precision': 0.6499007936507937, 'eval_f1': 0.44414371439632766, 'eval_runtime': 96.689, 'eval_samples_per_second': 1.489, 'eval_steps_per_second': 0.186, 'epoch': 3.0}
