# Introduction

Notebook to aggregate the running of models in the context of traceability between features and bug reports.

# Load Libraries and Datasets

In [1]:
from mod_finder_util import mod_finder_util
mod_finder_util.add_modules_origin_search_path()

import pandas as pd

from modules.models_runner.feat_br_runner import Feat_BR_Runner

import warnings; warnings.simplefilter('ignore')

# Run All Models

In [2]:
runner = Feat_BR_Runner()

lsi_model, lsi_eval = runner.run_lsi_model()
lda_model, lda_eval = runner.run_lda_model()
bm25_model, bm25_eval = runner.run_bm25_model()
w2v_model, w2v_eval = runner.run_word2vec_model()

Features.shape: (21, 8)
SelectedBugReports2.shape: (93, 22)
Expert and Volunteers Matrix.shape: (21, 93)

Model Evaluation -------------------------------------------
{'Measures': {'Mean FScore of LSI_Model_AllData': 0.053763440860215055,
              'Mean Precision of LSI_Model_AllData': 0.053763440860215055,
              'Mean Recall of LSI_Model_AllData': 0.053763440860215055},
 'Setup': [{'Name': 'LSI_Model_AllData'},
           {'Similarity Measure and Minimum Threshold': ('cosine', 0.8)},
           {'Top Value': 10},
           {'SVD Model': {'algorithm': 'randomized',
                          'n_components': 100,
                          'n_iter': 10,
                          'random_state': 42,
                          'tol': 0.0}},
           {'Vectorizer': {'analyzer': 'word',
                           'binary': False,
                           'decode_error': 'strict',
                           'dtype': <class 'numpy.float64'>,
                           'encoding

# Results

In [3]:
results = pd.DataFrame(columns=['model','precision','recall','fscore'])

results['model'] = [lsi_model.get_name(), 
                    lda_model.get_name(), 
                    bm25_model.get_name(), 
                    w2v_model.get_name()]

results['precision'] = [lsi_eval.get_mean_precision(), 
                        lda_eval.get_mean_precision(), 
                        bm25_eval.get_mean_precision(), 
                        w2v_eval.get_mean_precision()]

results['recall'] = [lsi_eval.get_mean_recall(),
                     lda_eval.get_mean_recall(),
                     bm25_eval.get_mean_recall(),
                     w2v_eval.get_mean_recall()]

results['fscore'] = [lsi_eval.get_mean_fscore(),
                     lda_eval.get_mean_fscore(),
                     bm25_eval.get_mean_fscore(),
                     w2v_eval.get_mean_fscore()]

results.head()

Unnamed: 0,model,precision,recall,fscore
0,LSI_Model_AllData,0.053763,0.053763,0.053763
1,LDA_Model_AllData,0.028994,0.204301,0.050777
2,BM25_Model_AllData,0.016897,0.354839,0.032258
3,WordVec_Model_AllData,0.021228,0.27957,0.039141
