# Overview: ZSL-Embedding Approach

In [None]:
!unzip BERT4RE.zip

### Step 1:Import necessary packages 

In [1]:
# for utilizing the language models
!pip install transformers
from transformers import AutoTokenizer, AutoModel 
from torch.nn import functional as F # to use cosine_similarity
#for processing output (reading and writing to CSV files)
import pandas as pd 
import csv 
#evaluation 
from sklearn.metrics import classification_report, confusion_matrix # for evaluation report 
import matplotlib.pyplot as plt # for vis. F1-scores result
from google.colab import data_table # to display dataset headers
from sklearn.metrics import matthews_corrcoef
 

Collecting transformers
  Downloading transformers-4.35.2-py3-none-any.whl.metadata (123 kB)
     ---------------------------------------- 0.0/123.5 kB ? eta -:--:--
     -------------------------------------- 123.5/123.5 kB 3.7 MB/s eta 0:00:00
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.19.4-py3-none-any.whl.metadata (14 kB)
Collecting pyyaml>=5.1 (from transformers)
  Downloading PyYAML-6.0.1-cp310-cp310-win_amd64.whl.metadata (2.1 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2023.10.3-cp310-cp310-win_amd64.whl.metadata (41 kB)
     ---------------------------------------- 0.0/42.0 kB ? eta -:--:--
     ---------------------------------------- 42.0/42.0 kB 2.1 MB/s eta 0:00:00
Collecting tokenizers<0.19,>=0.14 (from transformers)
  Downloading tokenizers-0.15.0-cp310-none-win_amd64.whl.metadata (6.8 kB)
Collecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-none-win_amd64.whl

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


ModuleNotFoundError: No module named 'torch'

### Step 2: Import datasets

#### **Dataset(A)** PROMISE Requirements dataset 

A frequently-used dataset from PROMISE repository by Cleland-Huang et al. (2007) 

##### **Dataset A-1: PROMISE dataset as Functionl and Non-function Req.**

Classification of PROMISE dataset as functionl and non-functionl requirements including all the 10 classes of non-functional requirements. Used in Hey et al. (2020) + Kurtanović & Maalej (2017)

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1yzn8oGgk7ovYkYrF7yKyJEfLJcf6Dwk3/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-US: NFR dataset "Usability" 
370 requirements -- 67 as US requirements

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1Dju0hL_ScNUAiupba187we6697l0gtOT/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_US = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_US, include_index=False, num_rows_per_page=10)

249 requirements -- Top 4 NFR classes 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/10lGfHu0NkOUeOf2vMNUk0l6_xLUimTK_/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_TopUS = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_TopUS, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-SE: NFR dataset "Security" 
370 requirements -- 66 as SE requirements

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1BnippAdBsd8UihN5mN0EPYDS5XnzHfSd/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_SE = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_SE, include_index=False, num_rows_per_page=10)

249 requirements -- Top 4 NFR classes 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1E2A18mU0ptQaHRgGZ_mkUUi9nt2qqRbp/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_TopSE = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_TopSE, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-O: NFR dataset "Operational" 
370 requirements -- 62 as O requirements

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1VU23jZpmb1GTT9auv4Ws7mXEzMF0JzVB/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_O = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_O, include_index=False, num_rows_per_page=10)

249 requirements -- Top 4 NFR classes 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1usBg9X4E1e_Bqi_OX_NxohuG_7ec9sdv/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_TopO = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_TopO, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-PE: NFR dataset "Performance" 
370 requirements -- 54 as PE requirements

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1VzhbFPECg8YWd_J_C-OCfE5GHz6A_ySk/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_PE = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_PE, include_index=False, num_rows_per_page=10)

249 requirements -- Top 4 NFR classes 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1tZmWl3RLUA9kZTbJfA_0OAsyjyCDkCe9/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_TopPE = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_TopPE, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-A: NFR dataset "Availability" 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1GO93zdgxDe07ES9zvs4Frb0itst3LNfx/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_A = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_A, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-FT: NFR dataset "Fault Tolerance " 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1io6QvNy74iQHWNYvU3fW75OsPBuqeYrX/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_FT = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_FT, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-L: NFR dataset "Legal " 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1O1ncvpd8_7smCOj_ZvteYQ0gi7IUG0jQ/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_L = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_L, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-LF: NFR dataset "Look & Feel" 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1ZUOEuPOXBni216xOzlhy6AJg4AmdEcKq/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_LF = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_LF, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-SC: NFR dataset "Scalability " 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1OrbwWKDBFtrO7gbTF4F_pCjU1C0_oXqZ/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_SC= pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_SC, include_index=False, num_rows_per_page=10)

###### **Dataset A-1-MN: NFR dataset "Maintainability" 

In [None]:
#The dataset can downloaded from https://github.com/tobhey/NoRBERT  
url='https://drive.google.com/file/d/1YJJUIXO7dfceaUxfrE1uUN_d-EwU7M2P/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_1_MN = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_1_MN, include_index=False, num_rows_per_page=10)

##### **Dataset A-2: A re-labelled version of PROMISE dataset by Hey et al. (2020)**

Reclassification of PROMISE requirements as behavior, data, and function – relabeled by Hey et al. (2020)

In [None]:
url = 'https://drive.google.com/file/d/1r9g0ba0KNPvygEVB01i7ih6h_yjjAxg2/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_2 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_2, include_index=False, num_rows_per_page=10)

##### **Dataset A-3: A re-labelled version of PROMISE NFR dataset by Dalpiaz et al. (2019) and Hey et al. (2020)**

Reclassification of PROMISE requirements as functionl and qaulity requirements

In [None]:
url = 'https://drive.google.com/file/d/14W9k4z5unlho7y-4PGB_NjYtJHDsMex5/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_a_3 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_a_3, include_index=False, num_rows_per_page=10)

#### **Dataset(B)** Security Requirements dataset by Erik et al. (2011)

The dataset was selected by Alessio, the dataset of shall-requirements is splitted to security and non-security requirements.

dataset reference: https://doi.org/10.5281/zenodo**.4530183**

Knauss, Eric, et al. "Supporting requirements engineers in recognising security issues." International Working Conference on Requirements Engineering: Foundation for Software Quality. Springer, Berlin, Heidelberg, 2011.


In [None]:
#Dataset B: SeqReq (510 security requirements)
url = 'https://drive.google.com/file/d/1_6nnYhP_GpqrL_Cl3e8RIMRiXVbHBrxN/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_b = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_b, include_index=False, num_rows_per_page=10)

##### Dataset B-1: CPN dataset
210 requirements 

In [None]:
#CPN dataset 
url = 'https://drive.google.com/file/d/139FlvzPQoWZ05CH-PeRFuFgUtUpUhluN/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_b_1 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_b_1, include_index=False, num_rows_per_page=10)

##### Dataset B-*2*: GPS dataset
176 requirements 

In [None]:
#GPS dataset 
url = 'https://drive.google.com/file/d/1xT0pbgoORixk49pzQTlliHZVN1H7-LpM/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_b_2 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_b_2, include_index=False, num_rows_per_page=10)

##### Dataset B-3: ePurse dataset
124 requirements 

In [None]:
#ePurse dataset 
url = 'https://drive.google.com/file/d/122ySinJj1MJpMb0tgGGxsQInVM5UTUJA/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_b_3 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_b_3, include_index=False, num_rows_per_page=10)

##### Dataset B-4: CPN + GPS dataset
386 requirements 

In [None]:
#CPN+GPS dataset 
url = 'https://drive.google.com/file/d/12XCNLzChbqFowkAtHyH1-GcM8QMCx_uk/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_b_4 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_b_4, include_index=False, num_rows_per_page=10)

##### Dataset B-5: CPN + ePurse dataset
334 requirements 

In [None]:
#CPN + ePurse dataset 
url = 'https://drive.google.com/file/d/1j4upD1tqwo_t2IgdBzwJ6iEevfT80aEu/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_b_5 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_b_5, include_index=False, num_rows_per_page=10)

##### Dataset B-6: GPS + ePurse dataset
300 requirements 

In [None]:
#GPS + ePurse dataset 
url = 'https://drive.google.com/file/d/1YZnESRUIE0JfOhhcjTGmdpfFAtYd3fqx/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_b_6 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_b_6, include_index=False, num_rows_per_page=10)

#### **Dataset(C)** Functional/Quality/NotR Requirements dataset by Dalpiaz et al. (2019)


This datatset is a collection of functional and qaulity dataset from different sources (including industrial resources). 

The dataset was selected by Alessio, and reported in the following study

Dalpiaz, Fabiano, et al. "Requirements classification with interpretable machine learning and dependency parsing." 2019 IEEE 27th International Requirements Engineering Conference (RE). IEEE, 2019. 



##### **Dataset(C-1)** Functional vs. Quality Requirements
956 requirements 

In [None]:
#location: https://zenodo.org/record/3309669#.YodcQ6hBxD9
url = 'https://drive.google.com/file/d/1_1o-r2eMa0XHNxsiwiFzq_qJpVF1nwBx/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_c_1 = pd.read_csv(dwn_url, delimiter='\t')
data_table.DataTable(dataset_c_1, include_index=False, num_rows_per_page=10)

##### **Dataset(C-2)** Requirements Detection
Dataset on detecting requirement or not requirement from PROMISE dataset

In [None]:
#location: https://zenodo.org/record/3309669#.YodcQ6hBxD9
url = 'https://drive.google.com/file/d/10H-7GSDVHUjnMPDE8pCMc7cOPEjlIr5k/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
dataset_c_2 = pd.read_csv(dwn_url, delimiter=';')
data_table.DataTable(dataset_c_2, include_index=False, num_rows_per_page=10)

### Step 3: Dataset wrapping classes

To wrap and prepare the classification dataset as pair or mutli-class sets.

##### **Dataset Wrapping for Binary Classification Tasks**

*The Wrapping class has three methods:*
1. BinaryClass_Task_OvR to compare one class vs. all remaining classes in the dataset 
2. BinaryClass_Task_OvR_ExceptOne to compare one class vs. all remaining classes in the dataset except one name class
3. BinaryClass_Task_OvO which applies pairwise classification to combine two classes only, and the dataset support equals to rows of class_1 + rows of class_2.


In [None]:
class Binary_Classification:

  def __init__(self, dataset, label_1, label_2, abber_1, abber_2): 
    self.dataset = dataset
    self.label_1 = label_1
    self.abber_1 = abber_1
    self.label_2 = label_2
    self.abber_2 = abber_2
  
  # ****One vs. Rest****
  # one class vs. ALL remaining classes in the dataset
  def BinaryClass_Task_OvR(self, col):
    requirements = []
    gold_label = []
    labels_long = [self.label_1, self.label_2]
    for index, row in self.dataset.iterrows():
      if(row[col] == self.abber_1):
        gold_label.append(self.label_1)
        requirements.append(row['RequirementText'])
      else:
        gold_label.append(self.label_2)
        requirements.append(row['RequirementText'])
    return [requirements, gold_label, labels_long]

  # ****One vs. Rest****
  # one class vs. remaining classes in the dataset EXCEPT one class
  def BinaryClass_Task_OvR_ExceptOne(self, not_this_class, col):
    requirements = []
    gold_label = []
    labels_long = [self.label_1, self.label_2]
    for index, row in self.dataset.iterrows():
      if(row[col] == not_this_class):
        continue 
      elif(row[col] == self.abber_1):
        gold_label.append(self.label_1)
        requirements.append(row['RequirementText'])
      else:
        gold_label.append(self.label_2)
        requirements.append(row['RequirementText'])
    return [requirements, gold_label, labels_long]
 
  #****One vs. One****
  #Pairwise classification combine two classes only (support: rows of class_1 + rows of class_2)
  def BinaryClass_Task_OvO(self, col):
    requirements = []
    gold_label = []
    labels_long = [self.label_1, self.label_2]
    for index, row in self.dataset.iterrows():
      if(row[col] == self.abber_1):
        gold_label.append(self.label_1)
        requirements.append(row['RequirementText'])
      if(row[col] == self.abber_2):
        gold_label.append(self.label_2)
        requirements.append(row['RequirementText'])
    return [requirements, gold_label, labels_long]
  
 

##### **Dataset Wrapping for Multiclass Classification Tasks**

*The Warpping class has one method:*
1. MultiClass_Task to compare in-between classes in the dataset




In [None]:
class Multi_Classification_RE:
  def __init__(self, dataset, labels_long, labels_short): 
    self.dataset = dataset
    self.labels_long = labels_long
    self.labels_short = labels_short

  def MutliClass_Task(self, col):
    requirements = []
    gold_label = []
    #gold_classes_def = []
    for index, row in self.dataset.iterrows():
      if(row[col] not in self.labels_short):
        continue 
      else:
        #assuming the class labels and their full names are alignd in two list in the same index!
        index = self.labels_short.index(row[col])
        gold_label.append(self.labels_long[index])
        requirements.append(row['RequirementText'])
    return [requirements, gold_label, self.labels_long]


### Step 4: Specify language models for embedding


think about use WE --> https://github.com/isti-fmt-nemis/Domain-specific-ambiguity/tree/master/NLP-DomainComparison/MODELS

In [None]:
#token level emebeddings
BERT_models = ['bert-base-uncased','bert-large-uncased', 'bert-base-cased', 'bert-large-cased']
DistilBERT_models = ['typeform/distilbert-base-uncased-mnli']
RoBERTa_models = ['roberta-base', 'xlm-roberta-base'] # 'microsoft/deberta-xlarge-mnli'
XLNet_models = ['xlnet-base-cased']
#sentence level embeddings
sentenceTransformers = ['deepset/sentence_bert', 'sentence-transformers/all-MiniLM-L12-v2', 
                        'jeniya/BERTOverflow_stackoverflow_github', 'lanwuwei/BERTOverflow_stackoverflow_github',
                        'ivanlau/distil-bert-uncased-finetuned-github-issues', 'huggingface/CodeBERTa-small-v1']

#test_model = ['deepset/sentence_bert'] #LM1
#test_model = ['sentence-transformers/all-MiniLM-L12-v2'] #LM2
#test_model = ['jeniya/BERTOverflow'] #LM4
#test_model = ['BERT4RE/BERT4RE/'] #LM3
#candidate_LMs = test_model
candidate_LMs = ['deepset/sentence_bert', 'sentence-transformers/all-MiniLM-L12-v2', 'jeniya/BERTOverflow', 'BERT4RE/BERT4RE/' ]
print('The total candidate LMs is: '+str(len(candidate_LMs)))

### Step 5: Setting ZSL pipeline by 🤗

#### A) Mutliclassification settings: One label 

In [None]:

def ZSL_Req_classifier(requirement, tokenizer, model, labels_):
    inputs = tokenizer.batch_encode_plus([requirement] + labels_, return_tensors='pt', padding='longest')
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']
    output = model(input_ids, attention_mask=attention_mask)[0]
    sentence_rep = output[:1].mean(dim=1)
    label_reps = output[1:].mean(dim=1)
    # now find the labels with the highest cosine similarities to the requirement 
    similarities = F.cosine_similarity(sentence_rep, label_reps)
    closest = similarities.argsort(descending=True)
    first = True
    for ind in closest:
      if first == True :  
        label = labels_long[ind] #Get the highest similarity score only
        score = str(similarities[ind].item())
      first = False
      #print("================================")
    return [label, score]

def ZSL_Req_results(requirements, gold_classes, tags_name, tags_definition, model_title, task_name):
  tokenizer = AutoTokenizer.from_pretrained(model_title)
  model = AutoModel.from_pretrained(model_title)
  predicated_classes = []
  similarity_scores = []
  for req in requirements:
    results = ZSL_Req_classifier(req, tokenizer, model,tags_definition)
    print(req)
    print(results[0]) #predicated classes 
    print(results[1]) #similarity scores
    predicated_classes.append(results[0])
    similarity_scores.append(results[1])
  filename_scores = str(task_name+'_'+model_title.replace('/','-')+'.csv')
  filename_txt = str(task_name+'_'+model_title.replace('/','-')+'MCC.txt')
  filename_report = str(task_name+'_'+model_title.replace('/','-')+'_ClassificationReport.csv')
  MCC_score = matthews_corrcoef(gold_classes, predicated_classes)
  print(f'the result of matthews correlation cofficient is:{MCC_score}')
  with open(filename_txt, 'w') as f:
    f.write("The result " + str(MCC_score))
  report = classification_report(gold_classes, predicated_classes, target_names=tags_name, output_dict=True)
  print(report)
  df = pd.DataFrame(report).transpose()
  df.to_csv(filename_report)
  data = zip(requirements, gold_label, predicated_classes, similarity_scores)
  with open(filename_scores, "w") as output:
    writer = csv.writer(output, lineterminator='\n')
    writer.writerow(['Requirement text', 'Gold label' , 'Predicated label', 'ZSL score'])
    writer.writerows(data)
  return [model_title, df, data]

#### B) Multilabelling Settings: Top-n labels

In [None]:

def ZSL_Req_classifier_MultiL(requirement, tokenizer, model, labels_, gold_label, top):
  inputs = tokenizer.batch_encode_plus([requirement] + labels_,
                                     return_tensors='pt',
                                     pad_to_max_length=True)
  input_ids = inputs['input_ids']
  attention_mask = inputs['attention_mask']
  output = model(input_ids, attention_mask=attention_mask)[0]
  sentence_rep = output[:1].mean(dim=1)
  label_reps = output[1:].mean(dim=1)

  # now find the labels with the highest cosine similarities to
  # the sentence
  similarities = F.cosine_similarity(sentence_rep, label_reps)
  closest = similarities.argsort(descending=True)
  first = True 
  ordered_labels = []
  similarity_scores = []
  count = 0
  scores = []
  recalled_labels = []
  for ind in closest:
    if not first and count < top:
      ordered_labels.append(labels_[ind])
      similarity_scores.append(similarities[ind].item())
      old_value = similarities[ind].item()
      count = count + 1
      scores.append(similarities[ind].item())
      recalled_labels.append(labels_[ind])
    elif first:
      ordered_labels.append(labels_[ind])
      similarity_scores.append(similarities[ind].item())
      first = False
      count = count + 1
      scores.append(similarities[ind].item())
      recalled_labels.append(labels_[ind])
    else:
      scores.append(similarities[ind].item())
      recalled_labels.append(labels_[ind])

  yes= True if gold_label in ordered_labels else False
  if yes:
    index = ordered_labels.index(gold_label)
    actual_diff = -1 if index==0 else similarity_scores[0]-similarity_scores[index]
    score = similarity_scores[index]
    position = index+1
  else:
    actual_diff = -2
    score = scores[recalled_labels.index(gold_label)]
    position = recalled_labels.index(gold_label)+1
  print(requirement)
  return_result = [ordered_labels, ordered_labels[0], similarity_scores[0],yes, gold_label, score, position, actual_diff]
  print(return_result)
  return return_result

def ZSL_Req_results_MultiL(requirements, gold_classes, tags_name, tags_definition, model_title, task_name, top):
  tokenizer = AutoTokenizer.from_pretrained(model_title)
  model = AutoModel.from_pretrained(model_title)
  predicated_classes = []
  similarity_scores = []
  calc_differences = []
  gold_label_positions = []
  recalled_labels = []
  for req in requirements:
    index_ = requirements.index(req) #current index
    results = ZSL_Req_classifier_MultiL(req, tokenizer, model,tags_definition, gold_classes[index_], top)
    predicated_label = results[1] if not results[3] else results[4]
    similarity_score = results[2] if not results[3] else results[5]
    predicated_classes.append(predicated_label)
    similarity_scores.append( similarity_score)
    recalled_labels.append(results[0])
    calc_differences.append(results[7])
    gold_label_positions.append(results[6])

  filename_scores = str(task_name+'_'+model_title.replace('/','-')+'MultiLabelling.csv')
  filename_txt = str(task_name+'_'+model_title.replace('/','-')+'MultiLabelling_MCC.txt')
  filename_report = str(task_name+'_'+model_title.replace('/','-')+'MultiLabelling_ClassificationReport.csv')
  MCC_score = matthews_corrcoef(gold_classes, predicated_classes)
  print(f'the result of matthews correlation cofficient for MultiLabelling task is:{MCC_score}')
  with open(filename_txt, 'w') as f:
    f.write("The result of MCC for MultiLabelling task: " + str(MCC_score))
  report = classification_report(gold_classes, predicated_classes, target_names=tags_name, output_dict=True)
  df = pd.DataFrame(report).transpose()
  df.to_csv(filename_report)
  data = zip(requirements, gold_label, predicated_classes, similarity_scores, recalled_labels,  gold_label_positions,calc_differences )
  with open(filename_scores, "w") as output:
    writer = csv.writer(output, lineterminator='\n')
    writer.writerow(['Requirement text', 'Gold label' , 'Predicated label', 'ZSL score', 'Top-n recalled labels', 'Gold label position', 'Diff. sim scores' ])
    writer.writerows(data)
  return [model_title, df, data]

### Step 6: Apply the ZSL classification Tasks

Apply the ZSL classification pipeline in different classification tasks in binary or multi classification settings



#### **Dataset_A_1 Experiments**

##### **Experiment 1**: Pairwise - Binary classification 

OvO (One vs. One) (Binary-classification) task

*One vs. One evalution method:* by comparing each class to another (non-identical) class per time for the categories in the dataset.

Number of result records = (#classes) x (#classes - 1) / 2

In [None]:
destination = 'Bi_class_OVO'
import os
os.mkdir(destination)

In [None]:
labelClass = ["US", "SE", "PE" , "O", "LF", "L", "FT", "MN","SC", "A", "PO"]
fullClass =  ['Usability', 'Security', 'Performance', 'Operational','Look-feel' , 'Legal', 'Fault & tolerance', 'Maintainability' , 'Scalability', 'Availability', 'Portability']
visited_labels = []
for LM in candidate_LMs:
  for class_1 in fullClass:
    if class_1 not in visited_labels:
      for class_2 in fullClass:
        if class_1 != class_2:
          label_1 = labelClass[fullClass.index(class_1)]
          label_2 = labelClass[fullClass.index(class_2)]
          DS = Binary_Classification(dataset_a_1, class_1, class_2, label_1, label_2)
          goldDataset = DS.BinaryClass_Task_OvO('class')
          requirements = goldDataset[0]
          gold_label = goldDataset[1]
          labels_long = goldDataset[2]
          print(len(requirements))
          print(gold_label)
          print(labels_long)
          result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+label_1+"and"+label_2 )
      visited_labels.append(class_1)

##### **Experiment 2: (Binary-classification) task**
OvR (One vs. Rest) 

*One vs. Rest or All evaluation method* which splits the dataset into positive (the desired class) and negative (all the other classes in the dataset ).

Number of result records = # selected classes. each selected class has its own result comparing to the other non-selected classes.

In [None]:
destination = 'Maintainability_REQ_Results'
import os
os.mkdir(destination)

In [None]:
labels_title = "MS_labels_group_E"

In [None]:
for LM in candidate_LMs:
  label_1 = "adaptability, effectiveness, agility, preventive, dependability, correcting, reuse, defect, mitigation, validated, resilience, achievable, remedy, assessing, or maintaining"
  label_2 = "usability, security, operational, performance, look & feel, legal, fault & tolerance, scalability, availability, or portability"
  abber_1 = "MN"
  abber_2 = "Non-MN"
  DS = Binary_Classification(dataset_a_1_MN, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('class')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2+labels_title )

In [None]:
!zip -r Maintainability_REQ_Results.zip Maintainability_REQ_Results

In [None]:
!rm -rf Maintainability_REQ_Results

##### **Experiment 3: OvR (One vs. Rest) '*Exclude one class*' (Binary-classification) task**

*One vs. Rest or All evaluation method* which splits the dataset into positive (the desired class) and negative (all the other classes in the dataset EXCLUDING one class).

Number of result records = # selected classes. each selected class has its own result comparing to the other non-selected classes.

In [None]:
destination = 'Bi_class_OVR_NonFuncationl'
import os
os.mkdir(destination)

In [None]:
#abberClass = ["US", "SE", "PE" , "O", "LF", "L", "FT", "MN","SC", "A", "PO"]
#fullClass =  ['Usability', 'Security', 'Performance', 'Operational','Look-feel' , 'Legal', 'Fault & Tolerance', 'Maintainability' , 'Scalability', 'Availability', 'Portability']

abberClass = ["O", "LF", "L", "FT", "MN","SC", "A", "PO"]
fullClass =  ['Operational','Look-feel' , 'Legal', 'Fault & Tolerance', 'Maintainability' , 'Scalability', 'Availability', 'Portability']

for LM in candidate_LMs:
  for class_1 in fullClass:
    label_1 = class_1
    label_2 = "not about "+label_1.lower()
    abber_1 = "non-"+label_1 if label_1 not in fullClass else abberClass[fullClass.index(label_1)]
    abber_2 = "non-"+label_2 if label_2 not in fullClass else abberClass[fullClass.index(label_2)]
    DS = Binary_Classification(dataset_a_1, label_1, label_2, abber_1, abber_2)
    goldDataset = DS.BinaryClass_Task_OvR_ExceptOne('F', 'class')
    requirements = goldDataset[0]
    gold_label = goldDataset[1]
    labels_long = goldDataset[2]
    print(len(requirements))
    print(gold_label)
    print(labels_long)
    result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

For the most frequent classes in NFR dataset

In [None]:
destination = 'Bi_class_OVR_Top_NonFuncationl'
import os
os.mkdir(destination)

In [None]:
abberClass = ["US", "SE", "PE" , "O"]
fullClass =  ['Usability', 'Security', 'Performance', 'Operational']
for LM in candidate_LMs:
  for class_1 in fullClass:
    label_1 = class_1
    label_2 = "not about "+label_1.lower()
    abber_1 = "non-"+label_1 if label_1 not in fullClass else abberClass[fullClass.index(label_1)]
    abber_2 = "non-"+label_2 if label_2 not in fullClass else abberClass[fullClass.index(label_2)]
    DS = Binary_Classification(dataset_a_1, label_1, label_2, abber_1, abber_2, abberClass)
    goldDataset = DS.BinaryClass_Task_OvR_ExceptOne('F')
    requirements = goldDataset[0]
    gold_label = goldDataset[1]
    labels_long = goldDataset[2]
    print(len(requirements))
    print(gold_label)
    print(labels_long)
    result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

##### **Experiment 4: MT (Multi-classification) task**

*Multi-classification evalution method* which classifies the dataset by considering all the classes in the dataset, as a mutli-classification task. The result is always one record to present the performance of the classifier.

In [None]:
destination = "All_NFR_Multi"
import os
os.mkdir(destination)

In [None]:
labels_title = "Multi_labels_group_B"

In [None]:
abberClass = ["US", "SE", "PE" , "O", "LF", "L", "FT", "MN","SC", "A"]
fullClass =  ["instructive, easy, helpful, useful, learnable, explainable, affordable, intuitive, or understandable",
              "security, authorization, or protection",
              "periodic execution or efficacy performance",
              "working, running, connecting, interfacing, or operative environment",
              "appearance, interface, look & feel",
              "legal, law, or rules",
              "system recovery & fault tolerance",
              "maintaining, fixing, running or updating",
              "scalable, increasable or developable",
              "avaliable or timely achievable"]
             
for LM in candidate_LMs:
  DS_M = Multi_Classification_RE(dataset_a_1, fullClass, abberClass)
  goldDataset_M = DS_M.MutliClass_Task('class')
  requirements = goldDataset_M[0]
  gold_label = goldDataset_M[1]
  labels_long = fullClass
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//Top_NFR"+ labels_title)

In [14]:
!zip -r All_NFR_Multi.zip All_NFR_Multi

  adding: All_NFR_Multi/ (stored 0%)
  adding: All_NFR_Multi/Top_NFRMulti_labels_group_B_deepset-sentence_bertMCC.txt (stored 0%)
  adding: All_NFR_Multi/Top_NFRMulti_labels_group_B_deepset-sentence_bert.csv (deflated 78%)
  adding: All_NFR_Multi/Top_NFRMulti_labels_group_B_jeniya-BERTOverflow.csv (deflated 81%)
  adding: All_NFR_Multi/Top_NFRMulti_labels_group_B_BERT4RE-BERT4RE-_ClassificationReport.csv (deflated 49%)
  adding: All_NFR_Multi/Top_NFRMulti_labels_group_B_sentence-transformers-all-MiniLM-L12-v2.csv (deflated 78%)
  adding: All_NFR_Multi/Top_NFRMulti_labels_group_B_BERT4RE-BERT4RE-MCC.txt (stored 0%)
  adding: All_NFR_Multi/Top_NFRMulti_labels_group_B_jeniya-BERTOverflow_ClassificationReport.csv (deflated 51%)
  adding: All_NFR_Multi/Top_NFRMulti_labels_group_B_jeniya-BERTOverflowMCC.txt (stored 0%)
  adding: All_NFR_Multi/Top_NFRMulti_labels_group_B_sentence-transformers-all-MiniLM-L12-v2_ClassificationReport.csv (deflated 52%)
  adding: All_NFR_Multi/Top_NFRMulti_labels

In [None]:
!rm -rf All_NFR_Multi

##### **Experiment 5: MultiL (Multi-labelling) task**

In [None]:
destination = "Top_NFR_MultiLabel"
import os
os.mkdir(destination)

In [None]:
labels_title = "MultiLabel_labels_group_A"

In [None]:
abberClass = ["US", "SE", "PE" , "O"]
fullClass =  ["usability", "security", "performance", "operational"]
for LM in candidate_LMs:
  DS_M = Multi_Classification_RE(dataset_a_1, fullClass, abberClass)
  goldDataset_M = DS_M.MutliClass_Task('class')
  requirements = goldDataset_M[0]
  gold_label = goldDataset_M[1]
  labels_long = fullClass
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results_MultiL(requirements, gold_label, labels_long, labels_long, LM, destination+"//MultiL_Top3_All_NFR"+labels_title,2 )

In [None]:
!zip -r Top_NFR_MultiLabel.zip Top_NFR_MultiLabel

In [None]:
!rm -rf Top_NFR_MultiLabel

#### **Dataset_A_2 Experiments**

##### **Experiment 1: OvR (One vs. Rest) (Binary-classification) task**

*One vs. Rest or All evaluation method* which splits the dataset into positive (the desired class) and negative (all the other classes in the dataset ).

Number of result records = # selected classes. each selected class has its own result comparing to the other non-selected classes.

In [None]:
destination = 'Bi_class_OVR_DA2_SourceLabels'
import os
os.mkdir(destination)

In [None]:
for LM in candidate_LMs:
  label_1 = "function"
  label_2 = "not about function"
  abber_1 = "F"
  abber_2 = "NF"
  DS = Binary_Classification(dataset_a_2, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('Label_F')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

In [None]:
for LM in candidate_LMs:
  label_1 = "behavior"
  label_2 = "not about behavior"
  abber_1 = "F"
  abber_2 = "NF"
  DS = Binary_Classification(dataset_a_2, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('Label_B')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

In [None]:
for LM in candidate_LMs:
  label_1 = "data"
  label_2 = "not about data"
  abber_1 = "F"
  abber_2 = "NF"
  DS = Binary_Classification(dataset_a_2, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('Label_D')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

#### **Dataset_A_3 Experiments**

##### **Experiment 1: OvR (One vs. Rest) (Binary-classification) task**

*One vs. Rest or All evaluation method* which splits the dataset into positive (the desired class) and negative (all the other classes in the dataset ).

Number of result records = # selected classes. each selected class has its own result comparing to the other non-selected classes.

In [None]:
destination = 'Bi_class_OVR_DA3'
import os
os.mkdir(destination)

In [None]:
for LM in candidate_LMs:
  label_1 = "function"
  label_2 = "not about function"
  abber_1 = "F"
  abber_2 = "NF"
  DS = Binary_Classification(dataset_a_3, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('Functional')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

In [None]:
for LM in candidate_LMs:
  label_1 = "quality"
  label_2 = "not about quality"
  abber_1 = "Q"
  abber_2 = "NQ"
  DS = Binary_Classification(dataset_a_3, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('Quality')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

#### **Dataset_B Experiments**

##### **Experiment 1: OvR (One vs. Rest) (Binary-classification) task**

*One vs. Rest or All evaluation method* which splits the dataset into positive (the desired class) and negative (all the other classes in the dataset ).

Number of result records = # selected classes. each selected class has its own result comparing to the other non-selected classes.

In [None]:
destination = 'ePurse_Security_REQ_Results_C'
import os
os.mkdir(destination)


In [None]:
labels_title = "ePurse_labels_Sec_C"

In [None]:
for LM in candidate_LMs:
  label_1 = "vulnerability, securing. protecting, protection, cybersecurity, assurance, cyber, countermeasure, threat, privacy, authentication, prevention, or confidentiality"
  label_2 = "not about vulnerability, securing. protecting, protection, cybersecurity, assurance, cyber, countermeasure, threat, privacy, authentication, prevention, or confidentiality"
  abber_1 = "sec"
  abber_2 = "nonsec"
  DS = Binary_Classification(dataset_b_3, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('Label')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2+labels_title )

In [None]:
!zip -r GPS_Security_REQ_Results_C.zip GPS_Security_REQ_Results_C

In [None]:
!rm -rf  GPS_Security_REQ_Results_C

##### **Experiment 2: MT (Multi-classification) task**

Multi-classification method which classifies the dataset by considering all the classes in the dataset, as a mutli-classification task. The result is always one record to present the performance of the classifier.

In [None]:
abberClass = ["sec", "nonsec"]
#fullClass =  ['security, authorization, or protection', 'not about security, authorization, or protection']
fullClass =  ['security', 'not about security']
for LM in candidate_LMs:
  DS_M = Multi_Classification_RE(dataset_b, fullClass, abberClass)
  goldDataset_M = DS_M.MutliClass_Task('Label')
  requirements = goldDataset_M[0]
  gold_label = goldDataset_M[1]
  labels_long = fullClass
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//MT" )

In [None]:
!zip -r Bi_class_OVR_DB.zip Bi_class_OVR_DB

#### **Dataset_C Experiments**

##### **Experiment 1: OvR (One vs. Rest) (Binary-classification) task**

*One vs. Rest or All evaluation method* which splits the dataset into positive (the desired class) and negative (all the other classes in the dataset ).

Number of result records = # selected classes. each selected class has its own result comparing to the other non-selected classes.

In [None]:
destination = 'Bi_class_OVR_DC'
import os
os.mkdir(destination)

In [None]:
for LM in candidate_LMs:
  label_1 = "function"
  label_2 = "not about function"
  abber_1 = "F"
  abber_2 = "NF"
  DS = Binary_Classification(dataset_c_1, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('IsFunctional')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

In [None]:
for LM in candidate_LMs:
  label_1 = "quality"
  label_2 = "not about quality"
  abber_1 = "Q"
  abber_2 = "NQ"
  DS = Binary_Classification(dataset_c_1, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('IsQuality')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

In [None]:
destination = 'Bi_class_OVR_NotR'
import os
os.mkdir(destination)

In [None]:
for LM in candidate_LMs:
  label_1 = "requirement"
  label_2 = "not a requirement"
  abber_1 = "R"
  abber_2 = "NotR"
  DS = Binary_Classification(dataset_c_2, label_1, label_2, abber_1, abber_2)
  goldDataset = DS.BinaryClass_Task_OvR('NotR')
  requirements = goldDataset[0]
  gold_label = goldDataset[1]
  labels_long = goldDataset[2]
  print(len(requirements))
  print(gold_label)
  print(labels_long)
  result = ZSL_Req_results(requirements, gold_label, labels_long, labels_long, LM, destination+"//"+abber_1+"and"+abber_2 )

In [None]:
!zip -r Bi_class_OVR_NotR.zip Bi_class_OVR_NotR

In [None]:
!rm -rf ePurse_Security_REQ_Results