# Introduction

In this notebook we demonstrate the use of **LDA (Latent Dirichlet Allocation)** generative statistical model for Information Retrieval technique to make trace link recovery between Test Cases and Bug Reports.

We model our study as follows:

* Each bug report title, summary and description compose a single query.
* We use each test case content as an entire document that must be returned to the query made

# Import Libraries

In [18]:
from mod_finder_util import mod_finder_util
mod_finder_util.add_modules_origin_search_path()

import pandas as pd

from modules.models_runner.tc_br_runner import TC_BR_Runner
from modules.utils import aux_functions

from IPython.display import display

import warnings; warnings.simplefilter('ignore')

# Running LDA Model

In [19]:
%%time

runner = TC_BR_Runner()
lda_model, lda_eval = runner.run_lda_model()

TestCases.shape: (207, 12)
SelectedBugReports2.shape: (93, 22)
OracleExpertVolunteers.shape: (207, 93)

Model Evaluation -------------------------------------------
{'Measures': {'Mean FScore of LDA_Model_TC_BR': 0.0573118195103464,
              'Mean Precision of LDA_Model_TC_BR': 0.032119649506966316,
              'Mean Recall of LDA_Model_TC_BR': 0.30864601107837325},
 'Setup': [{'Name': 'LDA_Model_TC_BR'},
           {'Similarity Measure and Minimum Threshold': ('cosine', 0.75)},
           {'Top Value': 100},
           {'LDA Model': {'batch_size': 128,
                          'doc_topic_prior': None,
                          'evaluate_every': -1,
                          'learning_decay': 0.7,
                          'learning_method': 'batch',
                          'learning_offset': 10.0,
                          'max_doc_update_iter': 100,
                          'max_iter': 10,
                          'mean_change_tol': 0.001,
                          'n_com

In [20]:
aux_functions.highlight_df(runner.orc.iloc[0:20, 0:7])

Unnamed: 0_level_0,BR_1181835_SRC,BR_1248267_SRC,BR_1248268_SRC,BR_1257087_SRC,BR_1264988_SRC,BR_1267480_SRC,BR_1267501_SRC
tc_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TC_1_TRG,0,0,0,0,0,0,0
TC_2_TRG,0,0,0,0,0,0,0
TC_3_TRG,0,0,0,0,0,0,0
TC_4_TRG,0,0,0,0,0,0,0
TC_5_TRG,0,0,0,0,0,0,0
TC_6_TRG,0,0,0,0,0,0,0
TC_7_TRG,0,0,0,0,0,0,0
TC_8_TRG,0,0,0,0,0,0,0
TC_9_TRG,0,0,0,0,0,0,0
TC_10_TRG,0,0,0,0,0,0,0


In [11]:
aux_functions.highlight_df(lda_model.get_trace_links_df().iloc[0:20, 0:7])

br_name,BR_1181835_SRC,BR_1248267_SRC,BR_1248268_SRC,BR_1257087_SRC,BR_1264988_SRC,BR_1267480_SRC,BR_1267501_SRC
tc_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TC_1_TRG,0,0,0,0,0,0,0
TC_2_TRG,0,0,0,0,0,0,0
TC_3_TRG,0,0,0,0,0,0,0
TC_4_TRG,0,0,0,0,0,0,0
TC_5_TRG,0,0,0,0,0,0,0
TC_6_TRG,0,0,0,0,0,0,0
TC_7_TRG,0,0,0,0,0,0,0
TC_8_TRG,0,0,0,0,0,0,0
TC_9_TRG,0,0,0,0,0,0,0
TC_10_TRG,0,0,0,0,0,0,0


In [12]:
aux_functions.highlight_df(lda_model.get_sim_matrix().iloc[0:20, 0:7])

br_name,BR_1181835_SRC,BR_1248267_SRC,BR_1248268_SRC,BR_1257087_SRC,BR_1264988_SRC,BR_1267480_SRC,BR_1267501_SRC
tc_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TC_1_TRG,0.735391,0.0958185,0.53688,0.0191128,0.260451,0.0587384,0.117689
TC_2_TRG,0.17356,0.12779,0.308315,0.0190335,0.453637,0.0400894,0.214149
TC_3_TRG,0.277369,0.202878,0.439871,0.0269861,0.590235,0.0772977,0.273356
TC_4_TRG,0.232793,0.169423,0.380118,0.0204375,0.531765,0.0338435,0.254616
TC_5_TRG,0.220353,0.162407,0.347632,0.0762809,0.478246,0.0238655,0.236417
TC_6_TRG,0.0743547,0.0581149,0.2404,0.11366,0.201757,0.0163476,0.0603366
TC_7_TRG,0.0737377,0.0246198,0.319523,0.104888,0.380877,0.407198,0.0260132
TC_8_TRG,0.185305,0.0198325,0.166307,0.0914372,0.0838001,0.0204156,0.0177672
TC_9_TRG,0.310611,0.0524759,0.180474,0.140152,0.0723752,0.0709194,0.0897314
TC_10_TRG,0.286829,0.0709037,0.426678,0.0912263,0.189636,0.0246268,0.0727596
