# Introduction

Notebook to aggregate the running of models in the context of traceability between features and bug reports.

# Load Libraries and Datasets

In [1]:
from mod_finder_util import mod_finder_util
mod_finder_util.add_modules_origin_search_path()

import pandas as pd
import numpy as np

from modules.models_runner.feat_br_models_runner import Feat_BR_Models_Runner
from modules.utils import firefox_dataset_p2 as fd
from modules.utils import aux_functions
from modules.utils import model_evaluator as m_eval
from modules.utils import similarity_measures as sm

import warnings; warnings.simplefilter('ignore')

# Run All Models

## Volunteers Only Strategy

In [2]:
models_runner_4 = Feat_BR_Models_Runner()
lsi_model_4 = models_runner_4.run_lsi_model()
lda_model_4 = models_runner_4.run_lda_model()
bm25_model_4 = models_runner_4.run_bm25_model()
w2v_model_4 = models_runner_4.run_word2vec_model()

oracle = fd.Feat_BR_Oracles.read_feat_br_volunteers_df().T

evaluator_4 = m_eval.ModelEvaluator(oracle)
evals_df_4 = evaluator_4.run_evaluator(models=[lsi_model_4,lda_model_4,bm25_model_4,w2v_model_4],
                                       top_values=[1,3,5], 
                                       sim_thresholds=[(sm.SimilarityMeasure.COSINE, x/10) for x in range(0,10)])

Features.shape: (19, 8)
SelectedBugReports.shape: (91, 18)
Running LSI model -----
Running LDA model -----
Running BM25 model -----
Running W2V model -----
Feat_BR Volunteers Matrix shape: (91, 19)
Evaluating LSI Model ----- 
Evaluating LDA Model ----- 
Evaluating BM25 Model ----- 
Evaluating WORDVECTOR Model ----- 


### Results

In [3]:
evals_df_4.head()

Unnamed: 0,model,ref_name,perc_precision,perc_recall,perc_fscore,eval_df,sim_threshold,top,trace_links_df
0,lsi,top_1_cosine_0.0,28.57,26.18,26.62,precision recall fscore Bug_Nu...,0.0,1.0,Bug_Number 1248267 1248268 125...
1,lsi,top_1_cosine_0.1,28.57,26.18,26.62,precision recall fscore Bug_Nu...,0.1,1.0,Bug_Number 1248267 1248268 125...
2,lsi,top_1_cosine_0.2,28.57,26.18,26.62,precision recall fscore Bug_Nu...,0.2,1.0,Bug_Number 1248267 1248268 125...
3,lsi,top_1_cosine_0.3,28.57,26.18,26.62,precision recall fscore Bug_Nu...,0.3,1.0,Bug_Number 1248267 1248268 125...
4,lsi,top_1_cosine_0.4,28.57,26.18,26.62,precision recall fscore Bug_Nu...,0.4,1.0,Bug_Number 1248267 1248268 125...


### Analysis of Precision

In [4]:
evals_df_4.groupby('model').perc_precision.max()

model
bm25          25.27
lda           14.29
lsi           31.32
wordvector    28.57
Name: perc_precision, dtype: float64

In [24]:
#aux_functions.highlight_df(oracle)

#### BM25

In [25]:
df = evals_df_4[(evals_df_4.model == 'bm25') & (evals_df_4.perc_precision == 25.27) & (evals_df_4.ref_name == 'top_1_cosine_0.9')]
df = df.iloc[0,:].trace_links_df
#aux_functions.highlight_df(df)

In [29]:
tp_list = aux_functions.get_true_positives(oracle, df)
fp_list = aux_functions.get_false_positives(oracle, df)
fn_list = aux_functions.get_false_negatives(oracle, df)

tp = len(tp_list)
fp = len(fp_list)
fn = len(fn_list)

perc_precision = tp/(tp+fp) * 100

print("TP = {}".format(tp))
print("FP = {}".format(fp))
print("FN = {}".format(fn))
print("Precision = TP/(TP+FP) = {}/{} = {:2.3}%".format(tp,tp+fp, perc_precision))

TP = 23
FP = 68
FN = 70
Precision = TP/(TP+FP) = 23/91 = 25.3%


In [43]:
bugreports = fd.Datasets.read_selected_bugreports_df()

SelectedBugReports.shape: (91, 18)


In [42]:
for idx,col in fp_list:
    print('Feature: {:<24} <=> BR: {:>8} : {:>50}'.format(idx, col, str(bugreports[bugreports.Bug_Number == col].Summary.values[0])))

Feature: new_awesome_bar          <=> BR:  1257087 :  Middle mouse click on history item would not open
Feature: new_awesome_bar          <=> BR:  1277937 : https has an underline instead of a strikethrough when mixed active content is loaded
Feature: new_awesome_bar          <=> BR:  1281190 : Keep collecting telemetry for Permissions Notifications until version 55
Feature: new_awesome_bar          <=> BR:  1282759 :         Support whole word search for find in page
Feature: new_awesome_bar          <=> BR:  1289240 : Services.search.getDefaultEngines() empty, search engine reset triggered when using a language pack
Feature: new_awesome_bar          <=> BR:  1290424 : Mouse-over (hover) on a dropdown suggestion (search, URL) causes the hovered-over item to be deleted on 'delete'-press
Feature: new_awesome_bar          <=> BR:  1297686 : When multiple desktop files support the same protocol scheme, only one of them is listed
Feature: new_awesome_bar          <=> BR:  1301784 :        

### Plots

In [None]:
evaluator_4.plot_evaluations_3(evals_df_4, title='BR-Feat Traceability Recovery - Volunteers-Only Strategy')

In [None]:
evaluator_4.plot_evaluations_4(evals_df_4)

In [None]:
evaluator_4.plot_evaluations_5(evals_df_4)

### Delete Variables

## Union Strategy

In [None]:
models_runner_1 = Feat_BR_Models_Runner()
lsi_model_1 = models_runner_1.run_lsi_model()
lda_model_1 = models_runner_1.run_lda_model()
bm25_model_1 = models_runner_1.run_bm25_model()
w2v_model_1 = models_runner_1.run_word2vec_model()

evaluator_1 = m_eval.ModelEvaluator(oracle=fd.Feat_BR_Oracles.read_feat_br_expert_volunteers_union_df().T)
evals_df_1 = evaluator_1.run_evaluator(models=[lsi_model_1,lda_model_1,bm25_model_1,w2v_model_1], 
                                       top_values=[1,3,5,10], 
                                       sim_thresholds=[(sm.SimilarityMeasure.COSINE, x/10) for x in range(0,10)])

### Results

In [None]:
evals_df_1.head()

In [None]:
evaluator_1.plot_evaluations_4(evals_df_1)

In [None]:
evaluator_1.plot_evaluations_5(evals_df_1)

### Plots

In [None]:
evaluator_1.plot_evaluations_3(evals_df_1, title='BR-Feat Traceability Recovery - Exp-Vol Union Strategy')

### Delete Variables

## Intersec Strategy

In [None]:
models_runner_2 = Feat_BR_Models_Runner()
lsi_model_2 = models_runner_2.run_lsi_model()
lda_model_2 = models_runner_2.run_lda_model()
bm25_model_2 = models_runner_2.run_bm25_model()
w2v_model_2 = models_runner_2.run_word2vec_model()

evaluator_2 = m_eval.ModelEvaluator(oracle=fd.Feat_BR_Oracles.read_feat_br_expert_volunteers_intersec_df().T)
evals_df_2 = evaluator_2.run_evaluator(models=[lsi_model_2,lda_model_2, bm25_model_2, w2v_model_2],
                                      top_values=[1,3,5,10], 
                                      sim_thresholds=[(sm.SimilarityMeasure.COSINE, x/10) for x in range(0,10)])

### Results

In [None]:
evals_df_2.head()

### Plots

In [None]:
evaluator_2.plot_evaluations_3(evals_df_2, title='BR-Feat Traceability Recovery - Exp-Vol Intersec Strategy')

In [None]:
evaluator_2.plot_evaluations_4(evals_df_2)

In [None]:
evaluator_2.plot_evaluations_5(evals_df_2)

### Delete Variables

## Expert Only Strategy

In [None]:
models_runner_3 = Feat_BR_Models_Runner()
lsi_model_3 = models_runner_3.run_lsi_model()
lda_model_3 = models_runner_3.run_lda_model()
bm25_model_3 = models_runner_3.run_bm25_model()
w2v_model_3 = models_runner_3.run_word2vec_model()

evaluator_3 = m_eval.ModelEvaluator(oracle=fd.Feat_BR_Oracles.read_feat_br_expert_df().T)
evals_df_3 = evaluator_3.run_evaluator(models=[lsi_model_3, lda_model_3, bm25_model_3, w2v_model_3],
                                       top_values=[1,3,5,10], 
                                       sim_thresholds=[(sm.SimilarityMeasure.COSINE, x/10) for x in range(0,10)])

### Results

In [None]:
evals_df_3.head()

### Plots

In [None]:
evaluator_3.plot_evaluations_3(evals_df_3, title='BR-Feat Traceability Recovery - Expert-Only Strategy')

In [None]:
evaluator_3.plot_evaluations_4(evals_df_3)

In [None]:
evaluator_3.plot_evaluations_5(evals_df_3)

### Delete Variables

del lsi_model_3, lda_model_3, bm25_model_3, w2v_model_3
del evals_df_3
del models_runner_3, evaluator_3