# Introduction

In this notebook we demonstrate the use of **Word Embeddings (Word2Vec)** weighting technique into Information Retrieval to make trace link recovery between Test Cases and Bug Reports.

We model our study as follows:

* Each bug report title, summary and description compose a single query.
* We use each test case content as an entire document that must be returned to the query made

# Import Libraries

In [1]:
from mod_finder_util import mod_finder_util
mod_finder_util.add_modules_origin_search_path()

import pandas as pd

from modules.models_runner.feat_br_runner import Feat_BR_Runner
from modules.utils import aux_functions

from IPython.display import display

import warnings; warnings.simplefilter('ignore')

# Running WordVec_Based Model

In [2]:
%%time

runner = Feat_BR_Runner()
w2v_model, w2v_eval = runner.run_word2vec_model()

Features.shape: (21, 8)
SelectedBugReports2.shape: (93, 22)
Expert and Volunteers Matrix.shape: (21, 93)

Model Evaluation -------------------------------------------
{'Measures': {'Mean FScore of WordVec_Model_AllData': 0.039141278435196346,
              'Mean Precision of WordVec_Model_AllData': 0.021227996299153794,
              'Mean Recall of WordVec_Model_AllData': 0.27956989247311825},
 'Setup': [{'Name': 'WordVec_Model_AllData'},
           {'Similarity Measure and Minimum Threshold': ('cosine', 0.8)},
           {'Top Value': 100},
           {'Tokenizer': <modules.utils.tokenizers.PorterStemmerBased_Tokenizer object at 0x7f361f884320>}]}
CPU times: user 16.7 s, sys: 643 ms, total: 17.4 s
Wall time: 17.4 s


In [3]:
aux_functions.highlight_df(runner.orc.iloc[0:20, 0:7])

Unnamed: 0_level_0,BR_1181835_SRC,BR_1248267_SRC,BR_1248268_SRC,BR_1257087_SRC,BR_1264988_SRC,BR_1267480_SRC,BR_1267501_SRC
feat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
new_awesome_bar,0,0,0,1,0,0,0
windows_child_mode,0,0,0,0,0,0,0
apz_async_scrolling,0,0,0,0,0,0,0
browser_customization,0,0,0,0,0,0,0
pdf_viewer,0,0,0,0,0,0,0
context_menu,1,0,0,0,0,0,0
w10_comp,0,0,0,0,0,0,0
tts_in_desktop,0,0,0,0,0,0,0
tts_in_rm,0,0,0,0,0,0,0
webgl_comp,0,0,0,0,0,0,0


In [5]:
aux_functions.highlight_df(w2v_model.get_trace_links_df().iloc[0:20, 0:7])

br_name,BR_1181835_SRC,BR_1248267_SRC,BR_1248268_SRC,BR_1257087_SRC,BR_1264988_SRC,BR_1267480_SRC,BR_1267501_SRC
feat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
new_awesome_bar,1,1,1,1,1,1,1
windows_child_mode,1,1,1,1,1,0,1
apz_async_scrolling,1,1,1,0,1,0,1
browser_customization,1,1,1,1,1,1,1
pdf_viewer,0,0,0,0,1,1,0
context_menu,0,1,1,1,1,0,1
w10_comp,1,1,1,1,1,1,1
tts_in_desktop,1,1,1,1,1,1,1
tts_in_rm,1,1,1,1,1,1,1
webgl_comp,0,0,0,0,0,0,0


In [6]:
aux_functions.highlight_df(w2v_model.get_sim_matrix().iloc[0:20, 0:7])

br_name,BR_1181835_SRC,BR_1248267_SRC,BR_1248268_SRC,BR_1257087_SRC,BR_1264988_SRC,BR_1267480_SRC,BR_1267501_SRC
feat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
new_awesome_bar,0.88096,0.934949,0.929217,0.890804,0.875051,0.832119,0.93057
windows_child_mode,0.844881,0.843091,0.900504,0.82063,0.842726,0.789301,0.863257
apz_async_scrolling,0.847133,0.822531,0.863177,0.798017,0.846902,0.754566,0.893438
browser_customization,0.838664,0.835262,0.85205,0.827119,0.857033,0.882645,0.833811
pdf_viewer,0.765519,0.778693,0.779521,0.786017,0.829928,0.854068,0.792893
context_menu,0.777969,0.920058,0.88074,0.851112,0.803638,0.753273,0.859855
w10_comp,0.849039,0.880215,0.88458,0.85664,0.866774,0.846003,0.90572
tts_in_desktop,0.830101,0.831585,0.846309,0.817743,0.85782,0.896116,0.852287
tts_in_rm,0.819324,0.84053,0.841126,0.827004,0.856205,0.865305,0.852443
webgl_comp,0.590359,0.610047,0.648597,0.591309,0.635873,0.659053,0.598825
