<a href="https://colab.research.google.com/github/FahadEbrahim/AdaptIRC/blob/main/NLBSE2024_AdaptIRC_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# How to run

AdaptIRC_V2:


* This notebook implements the adapter approach (AdaptIRC) on the NLBSE 2024 Issue Report Classification task.

* To run the notebook in Colab, just change the environment to GPU through: Runtime >> Change runtime type >> Hardware Accelerator >> GPU.

* You may require WANDB token if using newer versions of transformers lib

# Install Dependencies

In [1]:
!pip install -Uq adapters
!pip install -q datasets
!pip install -Uq accelerate


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m252.8/252.8 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h

# Import Libraires

Here, we are importing libraries that would be used throughout the notebook. (Pandas, Json, OS, Sklearn, numpy, collections, transformers, adapters, random, torch, re [regular expression] ).

In [2]:
from collections import defaultdict

from transformers import TrainingArguments, EvalPrediction, TrainerCallback, DataCollatorWithPadding

from sklearn.metrics import classification_report, recall_score, f1_score, precision_score

# Setting Seed

These lines set the seed for reproducability for several libraries ( torch, random, numpy, transformers)

In [3]:
import torch
import random
from transformers import set_seed
import numpy as np

# Selecting a random seed
RANDOM_SEED = 42

# Setting seed for reproducability
set_seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# Dataset

Reading the dataset cloned from NLBSE Github repository:

In [4]:
import pandas as pd

# Reading the CSV files from the NLBSE github
train_set = pd.read_csv("https://raw.githubusercontent.com/nlbse2024/issue-report-classification/main/data/issues_train.csv")
test_set = pd.read_csv("https://raw.githubusercontent.com/nlbse2024/issue-report-classification/main/data/issues_test.csv")

# Dataset Processing

In [5]:
# There were some Nan values that causes some issues, so, they are replaced with a single space
train_set=train_set.fillna(' ')
test_set=test_set.fillna(' ')

This function is used to pre-process the issues with various steps (
  
  * removing strings between triple Quotes
  * Remove new lines
  * Remove Links
  * Remove digits
  * Remove special characters except the question mark
  * Remove multiple spaces


In [6]:
import re

def preprocess(issues):
    processed_issues = []

    for issue in issues:

        # Remove strings between triple quotes
        issue = re.sub(r'```.*?```', ' ', issue, flags=re.DOTALL)

        # Remove new lines
        issue = re.sub(r'\n', ' ', issue)

        # Remove links
        issue = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', ' ', issue)

        # Remove digits
        issue = re.sub(r'\d+', ' ', issue)

        # Remove special characters except the question marks
        issue = re.sub(r'[^a-zA-Z0-9?\s]', ' ', issue)
        issue = re.sub(r'\s+', ' ', issue)

        processed_issues.append(issue)

    return processed_issues

In [7]:
# Apply the pre-process function for both train and testing sets on both the title and body.
train_set['title'] = preprocess(train_set['title'])
train_set['body'] = preprocess(train_set['body'])

test_set['title'] = preprocess(train_set['title'])
test_set['body'] = preprocess(train_set['body'])

In [8]:
# This code is taken from NLBSE
# creating the dataset with grouping it via repositry (repo)

from datasets import Dataset

repos = list(set(train_set["repo"].unique()))

train_set.groupby(["repo", "label"]).size().unstack(fill_value=0)

# Combining the title and body for a new field called text.
def process_dataset(dataset):
    dataset['text'] = dataset['title'] + " " + str(dataset['body'])
    dataset = dataset[['text', 'label', 'repo']]
    return dataset

train_set = process_dataset(train_set)
test_set = process_dataset(test_set)

group_by_repo = lambda dataset: {
    repo: Dataset.from_pandas(dataset[dataset["repo"] == repo]).class_encode_column("label")
    for repo in dataset["repo"].unique()
}

train_sets = group_by_repo(train_set)
test_sets = group_by_repo(test_set)

datasets = {
    repo: {'train': train_sets[repo], 'test': test_sets[repo]} for repo in train_sets.keys()
}

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/300 [00:00<?, ? examples/s]

# Model Configuration

Here is the new important code: Setting the configurations of the adapters and transformer model.

In [9]:
from transformers import RobertaTokenizer, RobertaConfig, TextClassificationPipeline
from adapters import RobertaAdapterModel

def create_model(model_name="roberta-base", max_length=256, truncation=True, padding="max_length", device="cuda"):
  # The tokenizer is based on Roberta. The configurations are: Max_length = 256, truncation = true, padding = max_length.
  tokenizer = RobertaTokenizer.from_pretrained(model_name, device=device, max_length=max_length, truncation=truncation, padding=padding)

  # Configuration: We have 3 labels: Bug, Enhancment, Question.
  config = RobertaConfig.from_pretrained(model_name, device=device, num_labels=3)

  # Configuration of the Adapter model.
  model = RobertaAdapterModel.from_pretrained(model_name, config=config)

  # This part is for inferencing
  classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, device=device, max_length=max_length, padding=padding, truncation=truncation)

  return tokenizer, model, classifier

# Creating Training and Infering Adapters

The training occurs at every repository:
* A classfication head is attached to the model defining the number of labels to be 3 and defining the labels.
* Initilaising the training of the Adapter
* Using Adapter Droput Trainer as the Callback.
* Configuring the adapter configuarion.
* Configure the trainer
* Adding the callback.
* Start training the adapter
* Evalauting the adapter

In [10]:
from adapters import AdapterTrainer

references = {}
predictions = {}

# Parameter used for Training
learning_rate=1e-4
epochs=200
batch_size=32

for repo in datasets.keys():

  dataset = datasets[repo]
  tokenizer, model, classifier = create_model()

  # Extracting the training and testing sets from the dataset per repo
  train_set = dataset['train']

  id2label = {x: train_set.features["label"].int2str(x) for x in range(train_set.features["label"].num_classes)}

  # Tokenizing the training set
  train_set = train_set.shuffle(seed=RANDOM_SEED)
  train_set = train_set.map(lambda batch: tokenizer(batch["text"]), batched=True)
  train_set.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

  test_set = dataset['test']
  test_set = test_set.shuffle(seed=RANDOM_SEED)
  test_set = test_set.map(lambda batch: tokenizer(batch["text"]), batched=True)
  test_set.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

  # Adapter Name and Saving Direc
  adapter_name = f"irc-{repo.replace('/','-')}"

  # Add a new adapter
  model.add_adapter(adapter_name, overwrite_ok=True)

  # Add a matching classification head
  model.add_classification_head(
    adapter_name,
    num_labels=3,
    id2label=id2label,
    overwrite_ok=True
  )

  # Initilaize the adapter training
  model.train_adapter(adapter_name)

  # Metrics used for evaluation (accuracy, precision, recall and F1)
  def compute_metrics(p: EvalPrediction):
    labels = p.label_ids
    preds = np.argmax(p.predictions, axis=1)
    recall = recall_score(y_true=labels, y_pred=preds,average="weighted")
    precision = precision_score(y_true=labels, y_pred=preds,average="weighted")
    f1 = f1_score(y_true=labels, y_pred=preds,average="weighted")
    return {"precision": precision, "recall": recall, "f1": f1}

  # Configure Training arguements
  training_args = TrainingArguments(
    learning_rate=learning_rate,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    logging_steps=100,
    output_dir=f"training_output/{adapter_name}",
    overwrite_output_dir=True,
    remove_unused_columns=False,
    save_strategy="no",
    seed=RANDOM_SEED
  )

  # Having a data Collator
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

  # Configure the Adapter Trainer
  trainer = AdapterTrainer(
    model=model,
    args=training_args,
    eval_dataset=test_set,
    train_dataset=train_set,
    compute_metrics=compute_metrics,
    data_collator=data_collator
  )

  # Create an Adapter Callback
  class AdapterDropTrainerCallback(TrainerCallback):
      def on_step_begin(self, args, state, control, **kwargs):
        skip_layers = list(range(np.random.randint(0, 11)))
        kwargs['model'].set_active_adapters(adapter_name, skip_layers=skip_layers)

      def on_evaluate(self, args, state, control, **kwargs):
        kwargs['model'].set_active_adapters(adapter_name, skip_layers=None)


  # Add the callback to the trainer
  trainer.add_callback(AdapterDropTrainerCallback())

  # Start training the adapter
  trainer.train()

  evaluation = trainer.evaluate()
  display(evaluation)

  # Save the adapter
  model.save_adapter(f"training_output/{adapter_name}", adapter_name)

  # Merging the Repo
  model.merge_adapter(adapter_name)

  test_set = dataset['test']

  # Calcualting and Adding the metrics
  references[repo] = [model.config.id2label[id] for id in test_set['label']]
  predictions[repo] = [prediction['label'] for prediction in classifier(test_set['text'])]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The model 'RobertaAdapterModel' is not supported for . Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceCl

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Step,Training Loss
100,1.0435
200,0.6134
300,0.435
400,0.3335
500,0.2358
600,0.172
700,0.1531
800,0.105
900,0.1024
1000,0.0791


{'eval_loss': 9.795919140742626e-06,
 'eval_precision': 1.0,
 'eval_recall': 1.0,
 'eval_f1': 1.0,
 'eval_runtime': 6.1541,
 'eval_samples_per_second': 48.748,
 'eval_steps_per_second': 6.175,
 'epoch': 200.0}

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The model 'RobertaAdapterModel' is not supported for . Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceCl

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Step,Training Loss
100,1.1029
200,1.0625
300,0.8465
400,0.6595
500,0.5082
600,0.4059
700,0.3219
800,0.2702
900,0.2626
1000,0.1945


{'eval_loss': 0.00539412023499608,
 'eval_precision': 0.9966996699669968,
 'eval_recall': 0.9966666666666667,
 'eval_f1': 0.99666658333125,
 'eval_runtime': 6.1577,
 'eval_samples_per_second': 48.72,
 'eval_steps_per_second': 6.171,
 'epoch': 200.0}

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The model 'RobertaAdapterModel' is not supported for . Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceCl

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Step,Training Loss
100,1.103
200,1.096
300,0.8204
400,0.5087
500,0.3063
600,0.2387
700,0.1761
800,0.116
900,0.1281
1000,0.0989


{'eval_loss': 1.0803990335261915e-05,
 'eval_precision': 1.0,
 'eval_recall': 1.0,
 'eval_f1': 1.0,
 'eval_runtime': 6.1269,
 'eval_samples_per_second': 48.964,
 'eval_steps_per_second': 6.202,
 'epoch': 200.0}

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The model 'RobertaAdapterModel' is not supported for . Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceCl

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Step,Training Loss
100,1.1026
200,1.0948
300,0.926
400,0.6505
500,0.4342
600,0.3133
700,0.2444
800,0.1766
900,0.1951
1000,0.1458


{'eval_loss': 1.167988739325665e-05,
 'eval_precision': 1.0,
 'eval_recall': 1.0,
 'eval_f1': 1.0,
 'eval_runtime': 6.1043,
 'eval_samples_per_second': 49.145,
 'eval_steps_per_second': 6.225,
 'epoch': 200.0}

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The model 'RobertaAdapterModel' is not supported for . Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceCl

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Step,Training Loss
100,1.1031
200,1.096
300,0.8453
400,0.5509
500,0.3582
600,0.2465
700,0.1984
800,0.1457
900,0.1415
1000,0.1091


{'eval_loss': 1.0959015526168514e-05,
 'eval_precision': 1.0,
 'eval_recall': 1.0,
 'eval_f1': 1.0,
 'eval_runtime': 6.1662,
 'eval_samples_per_second': 48.652,
 'eval_steps_per_second': 6.163,
 'epoch': 200.0}

# Metrics

In [11]:
# Setting the metrics and labels
metrics = ['precision', 'recall', 'f1-score']
labels = ['bug', 'feature', 'question']

In [12]:
# A function to get the metric results
def get_results (repos):
  results = defaultdict(dict)

  for repo in repos:
    results[repo] = classification_report(references[repo], predictions[repo], output_dict=True)
    results[repo]['average'] = results[repo]['weighted avg']
    results[repo] = {label: {metric: results[repo][label][metric] for metric in metrics} for label in labels + ['average']}

  results['overall'] = {label: {metric: np.mean([results[repo][label][metric] for repo in repos]) for metric in metrics} for label in labels + ['average']}

  return results

In [13]:
# A function to write to a json file
import json

def write_json_file (results):
  #The output json file would be created containing the results.
  output_file_name = 'results.json'
  with open(output_file_name, 'w') as fp:
    json.dump(results, fp, indent=2)

In [14]:
# A function to print the results

def print_results (results):

  print(f"Repository{' '*15}Label     Precision  Recall     F1")
  for repo in repos + ['overall']:
    print("-"*63)
    for label in labels + ['average']:
      out = f"{repo:<25}{label:<10}"
      for metric in metrics:
        out += f"{results[repo][label][metric]:<10.4f} "
      print(out)

In [15]:

# Call the function to get the results
results = get_results (repos)

# This function call create the Json file with the results
write_json_file(results)

# This function prints the results
print_results(results)

Repository               Label     Precision  Recall     F1
---------------------------------------------------------------
facebook/react           bug       1.0000     1.0000     1.0000     
facebook/react           feature   1.0000     1.0000     1.0000     
facebook/react           question  1.0000     1.0000     1.0000     
facebook/react           average   1.0000     1.0000     1.0000     
---------------------------------------------------------------
microsoft/vscode         bug       1.0000     1.0000     1.0000     
microsoft/vscode         feature   1.0000     1.0000     1.0000     
microsoft/vscode         question  1.0000     1.0000     1.0000     
microsoft/vscode         average   1.0000     1.0000     1.0000     
---------------------------------------------------------------
tensorflow/tensorflow    bug       0.9901     1.0000     0.9950     
tensorflow/tensorflow    feature   1.0000     0.9900     0.9950     
tensorflow/tensorflow    question  1.0000     1.0000     1

# References & Ack

This notebook uses codes from:
* https://github.com/adapter-hub/adapters/blob/main/notebooks/01_Adapter_Training.ipynb
* https://github.com/adapter-hub/adapters/blob/main/notebooks/05_Adapter_Drop_Training.ipynb
* https://huggingface.co/docs/transformers/tasks/sequence_classification
* https://github.com/nlbse2024/issue-report-classification/blob/main/2-Template-SetFit.ipynb
