In [1]:
import pandas as pd
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoConfig, AutoTokenizer

comet_ml is installed but `COMET_API_KEY` is not set.


In [2]:
df = pd.read_csv("final_data_BACKUP (4).csv")

In [3]:
df = df[['feedback_text', 'feedback_date', 'feedback_platform', 'tags']]

In [4]:
df.head()

Unnamed: 0,feedback_text,feedback_date,feedback_platform,tags
0,Must have app!. My MUST have app!!! My favorit...,2020-09-06 18:01:30+00:00,Apple App Store,Requirements irrelevant
1,Spotify review. I lt is the best music app I h...,2020-09-11 20:47:30+00:00,Apple App Store,Requirements irrelevant
2,Gaona music. I like Gaona music so much.,2020-09-11 20:51:19+00:00,Apple App Store,Requirements irrelevant
3,Spotify.. Fantastic app for old & brand new wo...,2020-09-08 08:03:31+00:00,Apple App Store,Requirements irrelevant
4,"Can’t see lyrics. Don’t know why, iPad doesn’t...",2020-09-10 17:20:43+00:00,Apple App Store,Unexpected behaviour (View lyrics)


In [5]:
df["is_irrel"] = df.tags.str.lower().str.contains("requirements irrelevant")
df["is_rel"] = ~df["is_irrel"]

In [6]:
label_granularity = "requirements_relevance"
model_name = "distilbert-base-uncased"
label_granularity = "requirements_relevance"
is_multiclass = not (label_granularity == "requirements_relevance")
problem_type = "multi_label_classification" if is_multiclass else "single_label_classification"

In [12]:
label_order = ["irrelevant", "relevant"]

In [13]:
from dataset_preparation import FeedbackDataset
import numpy as np
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_text = tokenizer(text = df.feedback_text.tolist(), padding=True, truncation=True, max_length=256, return_tensors="np")
dataset = FeedbackDataset(tokenized_text, np.argmax(df[["is_irrel", "is_rel"]].values, axis=1), is_multiclass=False)

In [16]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoConfig, AutoTokenizer
import torch
from sklearn.metrics import brier_score_loss, accuracy_score, precision_recall_fscore_support, ndcg_score, roc_auc_score

def get_aprf1(labels, pred_label):
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred_label, average='binary', zero_division=0)
    acc = accuracy_score(labels, pred_label)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

def get_cutoff_metrics(labels, preds, cutoff):
    pred_label = preds > cutoff
    metrics = get_aprf1(labels, pred_label)
    return {f"{k}_{cutoff}": v for k, v in metrics.items()}

def multiclass_compute_metrics(eval_pred):
    
    logits, labels = eval_pred
    
    sig = torch.nn.Sigmoid()
    perc_preds = sig(torch.Tensor(logits)).numpy()
    
    results = {}
    
    num_pred_classes = logits.shape[1]
    for class_num in range(num_pred_classes):
        class_name = label_order[class_num]
        
        class_perc_preds = perc_preds[:, class_num]
        class_labels = labels[:, class_num]

        results[f"{class_name}_roc_auc"] = roc_auc_score(class_labels, class_perc_preds)
        cutoff_metrics = get_cutoff_metrics(class_labels, class_perc_preds, 0.5)
        cutoff_metrics = {f"{class_name}_{k}":v for k, v in cutoff_metrics.items()}
        results.update(cutoff_metrics)
    
    return results

def singleclass_compute_metrics(eval_pred):
    
    logits, labels = eval_pred
    
    predictions = np.argmax(logits, axis=-1)
    results = get_aprf1(labels, predictions)

    return results

compute_metrics = multiclass_compute_metrics if is_multiclass else singleclass_compute_metrics

training_args = TrainingArguments(
    output_dir=f'./results_test_{label_granularity}',          # output directory
    per_device_eval_batch_size=64,   # batch size for evaluation
    logging_dir=f'./logs_test_{label_granularity}',            # directory for storing logs
    fp16=True,
)

config = AutoConfig.from_pretrained(model_name, cache_dir="/mnt/Research/peter-research/peter_devine_nlp_models")
num_labels = 3 if label_granularity == "bug_feature_other" else 2
config.num_labels = num_labels
config.problem_type = problem_type
model = AutoModelForSequenceClassification.from_config(config)
model.load_state_dict(torch.load("/home/pdev438/projects/user_feedback_labeller/results_requirements_relevance/pytorch_model.bin"))

trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=dataset,         # training dataset
    eval_dataset=dataset,             # evaluation dataset
    compute_metrics=compute_metrics,
)

In [17]:
trainer.evaluate()

{'eval_loss': 0.5701955556869507,
 'eval_accuracy': 0.69,
 'eval_f1': 0.7113594040968343,
 'eval_precision': 0.7764227642276422,
 'eval_recall': 0.6563573883161512,
 'eval_runtime': 0.431,
 'eval_samples_per_second': 1160.013}

In [18]:
platform_metrics = {}

for platform_name in df.feedback_platform.unique():
    platform_df = df[df.feedback_platform == platform_name]
    
    tokenized_text = tokenizer(text = platform_df.feedback_text.tolist(), padding=True, truncation=True, max_length=256, return_tensors="np")
    dataset = FeedbackDataset(tokenized_text, np.argmax(platform_df[["is_irrel", "is_rel"]].values, axis=1), is_multiclass=False)
    platform_metrics[platform_name] = trainer.predict(dataset).metrics



In [19]:
pd.DataFrame(platform_metrics)

Unnamed: 0,Apple App Store,Spotify forum,Google Play Store,Reddit,Twitter
test_loss,0.504649,0.659293,0.339388,0.655921,0.691742
test_accuracy,0.72,0.65,0.83,0.64,0.61
test_f1,0.658537,0.771242,0.721311,0.73913,0.621359
test_precision,0.72973,0.983333,0.814815,0.910714,0.484848
test_recall,0.6,0.634409,0.647059,0.621951,0.864865
test_runtime,0.1141,0.0897,0.0649,0.0888,0.0473
test_samples_per_second,876.507,1115.346,1540.087,1126.68,2112.127


In [11]:
dataset[100:110]

{'input_ids': tensor([[  101,  2190,  2189,  ...,     0,     0,     0],
         [  101,  1045,  2293,  ...,     0,     0,     0],
         [  101,  2190,  2377,  ...,     0,     0,     0],
         ...,
         [  101, 10439,  2562,  ...,     0,     0,     0],
         [  101,  3115,  2028,  ...,     0,     0,     0],
         [  101,  2006,  2026,  ...,     0,     0,     0]]),
 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]]),
 'labels': tensor([0, 0, 0, 0, 1, 1, 0, 1, 0, 1])}