# Introduction

In this notebook we demonstrate the use of a **Customized Word Embedding (Word2Vec)** DL technique to make trace link recovery between System Features and Bug Reports.

We model our study as follows:

* Each bug report title, summary and description compose a single query.
* We use each test case content as an entire document that must be returned to the query made

# Import Libraries

In [1]:
from mod_finder_util import mod_finder_util
mod_finder_util.add_modules_origin_search_path()

import pandas as pd
import numpy as np
import pprint

from modules.models_runner.feat_br_models_runner import Feat_BR_Models_Runner
from modules.utils import aux_functions
from modules.utils import model_evaluator as m_eval
from modules.utils import firefox_dataset_p2 as fd
from modules.utils import similarity_measures as sm
from modules.utils import br_feat_final_matrix_handler as b2f_handler

from matplotlib import pyplot as plt

import warnings; warnings.simplefilter('ignore')

# Load Datasets

In [2]:
bugreports = fd.Datasets.read_selected_bugreports_df()
features = fd.Datasets.read_features_df()

SelectedBugReports.shape: (91, 18)
Features.shape: (19, 8)


# Running LSI Model - Exp-Vol UNION Oracle

In [3]:
models_runner_1 = Feat_BR_Models_Runner()

wv_model_1 = models_runner_1.run_cust_word2vec_model()

evaluator_1 = m_eval.ModelEvaluator(oracle=fd.Feat_BR_Oracles.read_feat_br_expert_volunteers_union_df().T)

sim_thresh = 0.0

wv_eval_t1 = evaluator_1.evaluate_model(model=wv_model_1, top_value=1, sim_threshold=sim_thresh)
wv_eval_t3 = evaluator_1.evaluate_model(model=wv_model_1, top_value=3, sim_threshold=sim_thresh)
wv_eval_t5 = evaluator_1.evaluate_model(model=wv_model_1, top_value=5, sim_threshold=sim_thresh)
wv_eval_t10 = evaluator_1.evaluate_model(model=wv_model_1, top_value=10, sim_threshold=sim_thresh)

Features.shape: (19, 8)
SelectedBugReports.shape: (91, 18)
Running Customized W2V model -----
Expert and Volunteers Matrix UNION.shape: (91, 19)


# Similarity Matrix

In [4]:
aux_functions.highlight_df(wv_model_1.get_sim_matrix().iloc[0:23, 0:14])

Bug_Number,1248267,1248268,1257087,1264988,1267480,1267501,1269348,1269485,1270274,1270983,1271607,1276120,1277937,1278388
feat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
new_awesome_bar,0.585232,0.683409,0.532926,0.517653,0.0943982,0.625739,0.405246,0.669941,-0.177764,0.130956,0.54347,0.615808,0.634834,0.111032
windows_child_mode,0.351096,0.685675,0.361876,0.406774,0.102669,0.484533,0.319093,0.432845,-0.0835177,-0.0084136,0.399081,0.39671,0.596024,0.217959
apz_async_scrolling,0.242721,0.618324,0.30411,0.536859,0.0760247,0.639625,0.139301,0.52741,-0.0646328,-0.00317222,0.366809,0.35776,0.651775,0.300734
browser_customization,0.279695,0.717728,0.309438,0.421188,0.0757264,0.483803,0.333826,0.405212,-0.118766,0.0494627,0.34888,0.441911,0.564583,0.224397
pdf_viewer,0.338891,0.447747,0.386952,0.516932,0.260488,0.188926,0.177389,0.223754,0.118525,0.163434,0.247223,0.331178,0.459912,0.23174
context_menu,0.845794,0.449137,0.751621,0.388953,0.151188,0.605191,0.377071,0.653261,-0.0805457,0.241041,0.588546,0.596338,0.402439,0.0905113
w10_comp,0.613961,0.409383,0.615794,0.344944,0.109871,0.670621,0.310443,0.668425,-0.00974115,0.204277,0.731716,0.604024,0.359895,0.122402
tts_in_desktop,0.343235,0.626938,0.423765,0.482641,0.104668,0.646733,0.228704,0.546295,-0.000345898,0.0894399,0.413101,0.419551,0.629204,0.351404
tts_in_rm,0.424592,0.6625,0.457473,0.586696,0.185424,0.632011,0.263545,0.565246,-0.0682375,0.0915254,0.400421,0.452249,0.697815,0.224421
webgl_comp,0.400215,0.515137,0.365655,0.298795,0.190286,0.36769,0.281783,0.423425,0.0626071,0.108393,0.480606,0.352146,0.403209,0.19412


# Analysis of BR x Feat Generated Traces

In [5]:
handler = b2f_handler.BR_Feat_Final_Matrix_Handler(mode=b2f_handler.HandlerMode.LOAD)
br_2_features_matrix_final = handler.add_mappings_1(columns_names=['Features_IDs_wv_t1_m',
                                                                   'Features_IDs_wv_t3_m',
                                                                   'Features_IDs_wv_t5_m',
                                                                   'Features_IDs_wv_t10_m',
                                                                   'Features_IDs_orc'],
                                                    trace_matrices=[wv_eval_t1['trace_links_df'],
                                                                    wv_eval_t3['trace_links_df'],
                                                                    wv_eval_t5['trace_links_df'],
                                                                    wv_eval_t10['trace_links_df'],
                                                                    evaluator_1.get_oracle_df()],
                                                    is_from_model=True)

br_2_features_matrix_final = handler.add_mappings_2(dest_columns_names=['f_names_wv_t1',
                                                                        'f_names_wv_t3',
                                                                        'f_names_wv_t5',
                                                                        'f_names_wv_t10',
                                                                        'f_names_orc'],
                                                   orig_columns_names=['Features_IDs_wv_t1_m',
                                                                       'Features_IDs_wv_t3_m',
                                                                       'Features_IDs_wv_t5_m',
                                                                       'Features_IDs_wv_t10_m',
                                                                       'Features_IDs_orc'])
br_2_features_matrix_final.head(10)

SelectedBugReports.shape: (91, 18)
Features.shape: (19, 8)
BR_2_Features Matrix Final.shape: (91, 5)


Unnamed: 0_level_0,bug_title,Features_IDs_exp_m,Features_IDs_vol_m,Features_IDs_exp_vol_union_m,Features_IDs_exp_vol_intersec_m,Features_IDs_wv_t1_m,Features_IDs_wv_t3_m,Features_IDs_wv_t5_m,Features_IDs_wv_t10_m,Features_IDs_orc,f_names_wv_t1,f_names_wv_t3,f_names_wv_t5,f_names_wv_t10,f_names_orc
Bug_Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1248267,"Right click on bookmark item of ""Recently Book...",6.0,6.0,6.0,6.0,6,1 6 7,1 6 7 12 14,1 2 6 7 8 9 10 12 14 19,6.0,[[context_menu]],"[[new_awesome_bar], [context_menu], [w10_comp]]","[[new_awesome_bar], [context_menu], [w10_comp]...","[[new_awesome_bar], [windows_child_mode], [con...",[[context_menu]]
1248268,"Unable to disable ""Recently bookmarked""",,4.0,4.0,,4,1 2 4,1 2 4 9 19,1 2 3 4 8 9 12 14 17 19,4.0,[[browser_customization]],"[[new_awesome_bar], [windows_child_mode], [bro...","[[new_awesome_bar], [windows_child_mode], [bro...","[[new_awesome_bar], [windows_child_mode], [apz...",[[browser_customization]]
1257087,Middle mouse click on history item would not open,,,,,6,1 6 7,1 6 7 12 14,1 5 6 7 8 9 10 12 14 19,,[[context_menu]],"[[new_awesome_bar], [context_menu], [w10_comp]]","[[new_awesome_bar], [context_menu], [w10_comp]...","[[new_awesome_bar], [pdf_viewer], [context_men...",[]
1264988,Scrollbar appears for a moment in the new Awes...,1.0,1.0,1.0,1.0,9,1 3 9,1 3 5 9 16,1 3 4 5 8 9 12 14 16 18,1.0,[[tts_in_rm]],"[[new_awesome_bar], [apz_async_scrolling], [tt...","[[new_awesome_bar], [apz_async_scrolling], [pd...","[[new_awesome_bar], [apz_async_scrolling], [br...",[[new_awesome_bar]]
1267480,Intermittent browser/components/preferences/in...,,,,,5,5 10 11,5 9 10 11 13,5 6 7 9 10 11 12 13 14 19,,[[pdf_viewer]],"[[pdf_viewer], [webgl_comp], [video_and_canvas...","[[pdf_viewer], [tts_in_rm], [webgl_comp], [vid...","[[pdf_viewer], [context_menu], [w10_comp], [tt...",[]
1267501,New Private Browsing start-page overflows off ...,3.0,,3.0,,12,7 8 12,3 7 8 9 12,1 3 6 7 8 9 12 14 18 19,3.0,[[pointer_lock_api]],"[[w10_comp], [tts_in_desktop], [pointer_lock_a...","[[apz_async_scrolling], [w10_comp], [tts_in_de...","[[new_awesome_bar], [apz_async_scrolling], [co...",[[apz_async_scrolling]]
1269348,Show last sync date tooltip on Synced Tabs sid...,,,,,1,1 4 6,1 2 4 6 12,1 2 4 6 7 8 9 10 12 14,,[[new_awesome_bar]],"[[new_awesome_bar], [browser_customization], [...","[[new_awesome_bar], [windows_child_mode], [bro...","[[new_awesome_bar], [windows_child_mode], [bro...",[]
1269485,New Private Browsing start-page has white/gray...,3.0,,3.0,,12,1 7 12,1 6 7 9 12,1 2 3 6 7 8 9 12 14 19,3.0,[[pointer_lock_api]],"[[new_awesome_bar], [w10_comp], [pointer_lock_...","[[new_awesome_bar], [context_menu], [w10_comp]...","[[new_awesome_bar], [windows_child_mode], [apz...",[[apz_async_scrolling]]
1270274,Intermittent browser_share.jsuncaught exceptio...,,,,,5,5 10 16,5 10 14 16 19,5 10 14 16 19,,[[pdf_viewer]],"[[pdf_viewer], [webgl_comp], [webgl2]]","[[pdf_viewer], [webgl_comp], [zoom_indicator],...","[[pdf_viewer], [webgl_comp], [zoom_indicator],...",[]
1270983,Intermittent browser_contextmenu.jsTest timed ...,6.0,6.0,6.0,6.0,6,5 6 7,1 5 6 7 10,1 5 6 7 8 9 10 11 12 14,6.0,[[context_menu]],"[[pdf_viewer], [context_menu], [w10_comp]]","[[new_awesome_bar], [pdf_viewer], [context_men...","[[new_awesome_bar], [pdf_viewer], [context_men...",[[context_menu]]


# Analysis of Precision and Recall

## Union Strategy

In [6]:
results_1 = evaluator_1.run_evaluator(models=[wv_model_1], top_values=[1,3,5], sim_thresholds=[(sm.SimilarityMeasure.COSINE, s_thresh/10) for s_thresh in range(0,10)])
evaluator_1.plot_evaluations_2(title='WordVector Evaluation | Exp-Vol UNION', results=results_1)

Evaluating CUST_WORDVECTOR Model ----- 


ZeroDivisionError: division by zero

## Intersection Strategy

In [7]:
evaluator_2 = m_eval.ModelEvaluator(oracle=fd.Feat_BR_Oracles.read_feat_br_expert_volunteers_intersec_df().T)
results_2 = evaluator_2.run_evaluator(models=[wv_model_1], top_values=[1,3,5], sim_thresholds=[(sm.SimilarityMeasure.COSINE, s_thresh/10) for s_thresh in range(0,10)])
evaluator_2.plot_evaluations_2(title='WordVector Evaluation | Exp-Vol INTERSECTION', results=results_2)

Expert and Volunteers Matrix INTERSEC.shape: (91, 19)
Evaluating CUST_WORDVECTOR Model ----- 


ZeroDivisionError: division by zero

## Expert-Only Strategy

In [None]:
evaluator_3 = m_eval.ModelEvaluator(oracle=fd.Feat_BR_Oracles.read_feat_br_expert_df().T)
results_3 = evaluator_3.run_evaluator(models=[wv_model_1], top_values=[1,3,5], sim_thresholds=[(sm.SimilarityMeasure.COSINE, s_thresh/10) for s_thresh in range(0,10)])
evaluator_3.plot_evaluations_2(title='WordVector Evaluation | Expert-Only', results=results_3)

## Volunteers-Only Strategy

In [None]:
evaluator_4 = m_eval.ModelEvaluator(oracle=fd.Feat_BR_Oracles.read_feat_br_volunteers_df().T)
results_4 = evaluator_4.run_evaluator(models=[wv_model_1], top_values=[1,3,5], sim_thresholds=[(sm.SimilarityMeasure.COSINE, s_thresh/10) for s_thresh in range(0,10)])
evaluator_4.plot_evaluations_2(title='WordVector Evaluation | Volunteers-Only', results=results_4)

### Means of Metrics

Considering **TOP** values (1,3,5) and **SIMILARITY THRESHOLDS** values range(0.0,0.9)

In [None]:
print("Mean Precision: {:2.3}".format(np.mean(results_4.perc_precision)))
print("Mean Recall: {:2.3}".format(np.mean(results_4.perc_recall)))
print("Mean F-Score: {:2.3}".format(np.mean(results_4.perc_fscore)))