# Introduction

In this notebook we demonstrate the use of **LDA (Latent Dirichlet Allocation)** generative statistical model for Information Retrieval technique to make trace link recovery between Test Cases and Bug Reports.

We model our study as follows:

* Each bug report title, summary and description compose a single query.
* We use each test case content as an entire document that must be returned to the query made

# Import Libraries

In [5]:
from mod_finder_util import mod_finder_util
mod_finder_util.add_modules_origin_search_path()

import pandas as pd

from modules.models_runner.feat_br_runner import Feat_BR_Runner
from modules.utils import aux_functions

from IPython.display import display

import warnings; warnings.simplefilter('ignore')

# Running LDA Model

In [6]:
%%time

runner = Feat_BR_Runner()
lda_model, lda_eval = runner.run_lda_model()

Features.shape: (21, 8)
SelectedBugReports2.shape: (93, 22)
Expert and Volunteers Matrix.shape: (21, 93)

Model Evaluation -------------------------------------------
{'Measures': {'Mean FScore of LDA_Model_AllData': 0.05077658303464755,
              'Mean Precision of LDA_Model_AllData': 0.028993855606758826,
              'Mean Recall of LDA_Model_AllData': 0.20430107526881722},
 'Setup': [{'Name': 'LDA_Model_AllData'},
           {'Similarity Measure and Minimum Threshold': ('cosine', 0.75)},
           {'Top Value': 100},
           {'LDA Model': {'batch_size': 128,
                          'doc_topic_prior': None,
                          'evaluate_every': -1,
                          'learning_decay': 0.7,
                          'learning_method': 'batch',
                          'learning_offset': 10.0,
                          'max_doc_update_iter': 100,
                          'max_iter': 10,
                          'mean_change_tol': 0.001,
                     

In [7]:
aux_functions.highlight_df(runner.orc.iloc[0:20, 0:7])

Unnamed: 0_level_0,BR_1181835_SRC,BR_1248267_SRC,BR_1248268_SRC,BR_1257087_SRC,BR_1264988_SRC,BR_1267480_SRC,BR_1267501_SRC
feat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
new_awesome_bar,0,0,0,1,0,0,0
windows_child_mode,0,0,0,0,0,0,0
apz_async_scrolling,0,0,0,0,0,0,0
browser_customization,0,0,0,0,0,0,0
pdf_viewer,0,0,0,0,0,0,0
context_menu,1,0,0,0,0,0,0
w10_comp,0,0,0,0,0,0,0
tts_in_desktop,0,0,0,0,0,0,0
tts_in_rm,0,0,0,0,0,0,0
webgl_comp,0,0,0,0,0,0,0


In [8]:
aux_functions.highlight_df(lda_model.get_trace_links_df().iloc[0:20, 0:7])

br_name,BR_1181835_SRC,BR_1248267_SRC,BR_1248268_SRC,BR_1257087_SRC,BR_1264988_SRC,BR_1267480_SRC,BR_1267501_SRC
feat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
new_awesome_bar,0,1,1,1,1,1,1
windows_child_mode,1,0,0,0,0,0,0
apz_async_scrolling,0,0,0,0,0,0,0
browser_customization,0,1,1,1,1,1,1
pdf_viewer,0,0,0,0,0,0,0
context_menu,0,1,1,0,0,0,0
w10_comp,0,1,1,1,1,1,1
tts_in_desktop,0,0,0,0,0,0,0
tts_in_rm,0,0,0,0,0,0,0
webgl_comp,0,1,1,1,1,1,1


In [9]:
aux_functions.highlight_df(lda_model.get_sim_matrix().iloc[0:20, 0:7])

br_name,BR_1181835_SRC,BR_1248267_SRC,BR_1248268_SRC,BR_1257087_SRC,BR_1264988_SRC,BR_1267480_SRC,BR_1267501_SRC
feat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
new_awesome_bar,0.562955,0.822464,0.84512,0.93454,0.809613,0.984298,0.849872
windows_child_mode,0.844236,0.278576,0.449097,0.302384,0.630708,0.379784,0.360606
apz_async_scrolling,0.567435,0.0608286,0.155969,0.207319,0.155284,0.0867827,0.482821
browser_customization,0.52273,0.817867,0.830518,0.929312,0.784044,0.974596,0.840958
pdf_viewer,0.140445,0.281083,0.208148,0.231773,0.199582,0.246156,0.247359
context_menu,0.270328,0.847533,0.777545,0.66724,0.681945,0.422576,0.595231
w10_comp,0.519749,0.815923,0.828604,0.927764,0.781865,0.973367,0.838794
tts_in_desktop,0.154436,0.230407,0.23238,0.259295,0.222186,0.275264,0.236096
tts_in_rm,0.495698,0.221529,0.275183,0.392815,0.213713,0.26457,0.628166
webgl_comp,0.521419,0.817024,0.829689,0.928648,0.783096,0.974074,0.840018
