# **Install Related Libraries**

In [None]:
!pip install transformers==4.17

In [2]:
from transformers import Trainer, TrainingArguments
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch
import pandas as pd
import nltk
import re
import ast
import numpy as np
from torch.utils.data import Dataset
from sklearn.metrics import precision_score, recall_score, f1_score, precision_recall_fscore_support,accuracy_score
from sklearn.model_selection import train_test_split

# **Import Datasets**

In [3]:
trainingData=pd.read_csv("issues_train.csv")
testingData=pd.read_csv("issues_test.csv")

# **Text Preprocessing**

In [4]:
trainingData["summary"]=trainingData["title"]+" "+trainingData['body']
testingData["summary"]=testingData["title"]+" "+trainingData['body']
trainingData["summary"]=trainingData["summary"].apply(lambda x: x[:256] if len(str(x))>256 else x)
testingData["summary"]=testingData["summary"].apply(lambda x: x[:256] if len(str(x))>256 else x)

In [5]:
def checkLink(text):
    link_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    return re.sub(link_pattern, "LINK" ,str(text))

trainingData['summary'] = trainingData['summary'].apply(checkLink)
testingData['summary'] = testingData['summary'].apply(checkLink)

In [6]:
trainingData = trainingData.applymap(lambda x: x.lower() if isinstance(x, str) else str(x))
testingData = testingData.applymap(lambda x: x.lower() if isinstance(x, str) else str(x))

In [7]:
def removeSpecialCharacters(text):
    pattern = r'[^a-zA-Z]'
    cleanedText = re.sub(pattern, ' ', text)
    cleanedText = ' '.join(cleanedText.split())
    return cleanedText

trainingData['summary'] = trainingData['summary'].apply(lambda x :removeSpecialCharacters(str(x)))
testingData['summary'] = testingData['summary'].apply(lambda x :removeSpecialCharacters(str(x)))

In [None]:
def dropNan(x):
  if len(str(x))<5:
    return False
  else: return True

mask=trainingData["summary"].apply(dropNan)
trainingData=trainingData[mask]
mask=testingData["summary"].apply(dropNan)
testingData=testingData[mask]
trainingData = trainingData.drop_duplicates(subset='summary')
testingData = testingData.drop_duplicates(subset='summary')
print(trainingData)

In [9]:
trainingData=trainingData[['repo','summary','label']]
testingData=testingData[['repo','summary','label']]
trainingData.to_csv("training_Dataset.csv", index=False)
testingData.to_csv("testing_Dataset.csv", index=False)

# **Dataset Preparation**

In [10]:
#load and view train data
data = pd.read_csv("training_Dataset.csv")

label=[]
for index, row in data.iterrows():
  if row['label']=="bug":
     label.append(0)
  elif row['label']=="feature":
      label.append(1)
  else: label.append(2)
data['label']=label

X = list(data["summary"])
y = list(data["label"])

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3)
df_train = pd.DataFrame({"summary":X_train,"label":y_train})

In [None]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')
model = RobertaForSequenceClassification.from_pretrained('roberta-large',num_labels=3)

In [12]:
df_valid = pd.DataFrame({"summary":X_val,"label":y_val})
train_text = df_train.summary.values
train_label = df_train.label.values
val_text = df_valid.summary.values
val_label = df_valid.label.values

In [13]:
# define custom dataset
class TextClassificationDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = self.texts[index]
        label = self.labels[index]

        encoded_text = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=256,
            padding='max_length',
            truncation=True,
            return_token_type_ids=False,
            return_attention_mask=True,
            return_tensors='pt'
        )

        input_ids = encoded_text['input_ids'].squeeze()
        attention_mask = encoded_text['attention_mask'].squeeze()
        label = torch.tensor(label)

        return {
            'input_ids': input_ids.cpu(),
            'attention_mask': attention_mask.cpu(),
            'labels': label.cpu()
        }

# create datasets
train_dataset = TextClassificationDataset(train_text, train_label, tokenizer)
eval_dataset = TextClassificationDataset(val_text, val_label, tokenizer)

In [14]:
#define custom metrics for validation to avoid error
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

# **Setting Hyperparameters**

In [15]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=32,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    warmup_steps=5,
    weight_decay=32,
    fp16=True,
    evaluation_strategy='epoch',
    learning_rate=7e-6,
    greater_is_better=True,
    gradient_accumulation_steps=2,
    eval_steps=10,
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics
)

# **Model Training**

In [None]:
trainer.train()

# **Model Evaluation**

In [None]:
trainer.evaluate()

# **Model Testing**

In [None]:
test_data = pd.read_csv("testing_Dataset.csv")
test_data.dropna(inplace=True)
X_test = list(test_data["summary"])
label=[]
for index, row in test_data.iterrows():
  if row['label']=="bug":
     label.append(0)
  elif row['label']=="feature":
      label.append(1)
  else: label.append(2)
test_data['label']=label


In [None]:
test_dataset = TextClassificationDataset(test_data['summary'],test_data['label'],tokenizer)
predictions=trainer.predict(test_dataset=test_dataset).predictions

In [None]:
labels=["bug","feature","question"]
predicted_labels=[]
trueFalse=[]
i=0
for prediction in predictions:
  index_of_max = np.argmax(prediction)
  predicted_labels.append(index_of_max)

test_data["predicted_label"]=predicted_labels

for index, row in test_data.iterrows():
  if row['label']==row['predicted_label']:
     trueFalse.append("True")
  else: trueFalse.append("False")

test_data["True/False"]=trueFalse

In [None]:
actual = test_data['label']
predicted = test_data['predicted_label']
accuracy = accuracy_score(actual, predicted)
precision = precision_score(actual, predicted, average='weighted')
recall = recall_score(actual, predicted ,average='weighted')
f1 = f1_score(actual, predicted, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")

# **Results**

In [None]:
repos = list(set(test_data["repo"].unique()))
grouped = test_data.groupby("repo")
smaller_dataframes = {}
for group_name, group_data in grouped:
    smaller_dataframes[group_name] = group_data.copy()

for repo in repos:
  precision, recall, f1, support = precision_recall_fscore_support(smaller_dataframes[repo]['label'],smaller_dataframes[repo]["predicted_label"])
  print("---------",repo,"------------")
  for label in range(len(precision)):
    print(f"Label {labels[label]}:")
    print(f"Precision: {precision[label]}")
    print(f"Recall: {recall[label]}")
    print(f"F1 Score: {f1[label]}")
    print(f"support: {support[label]}")
  print(f"Average Precision: {np.average(precision)}")
  print(f"Average Recall: {np.average(recall)}")
  print(f"Average F1 Score: {np.average(f1)}")
  print(f"Average Support: {np.average(support)}")
  print("---------------------")
  print("_______________________________________")
  i=i+1