In [None]:
import pandas as pd
import pubmedake

## Load the sample data and set the top k evaluations

In [None]:
# load the data
total = pubmedake.read_pubmedake('../data/sample_1000.json')
top_k_list = [5,10,15]

## Illustration of the 3 types of evaluation supported in PubMedAKE

- Exact match: Performs an exact string comparion between two sets of keywords
- Porter match: First stems the two sets of keywords using PorterStemmer and then compares the two sets of keywords
- Partial match: Calculates matching based on words within the keyphrases

In [None]:
def print_all_results(ext_kwds):
    exact_perf = pubmedake.evaluate_model(ext_kwds, total,
                                          pubmedake.exact_match,
                                          "keywords_in")
    stem_perf = pubmedake.evaluate_model(ext_kwds, total,
                                         pubmedake.porter_match, 
                                         "keywords_in")
    partial_perf = pubmedake.evaluate_model(ext_kwds, total, 
                                            pubmedake.partial_match, 
                                            "keywords_in")
    print("-----Exact-----")
    print(pd.DataFrame.from_dict(exact_perf).T)
    print("-----Stem-----")
    print(pd.DataFrame.from_dict(stem_perf).T)
    print("-----Partial-----")
    print(pd.DataFrame.from_dict(partial_perf).T)

## Illustration of model evaluation of Yake on the sample dataset

In [None]:
# run yake and store to file
yakekwds = pubmedake.run_pke_model(total, top_k_list, 
                                   pubmedake.run_yake,
                                   "../results/sample_yakekwds.json")
print_all_results(yakekwds)

## Evaluation of other unsupervised methods in the pke package

In [None]:
# run textrank and store to file
textkwds = pubmedake.run_pke_model(total, top_k_list, 
                                   pubmedake.run_textrank,
                                   "../results/sample_textrankkwds.json")
print_all_results(textkwds)

In [None]:
# run singlerank and store to file
singlerankkwds = pubmedake.run_pke_model(total, top_k_list, 
                                   pubmedake.run_singlerank,
                                   "../results/sample_singlerankkwds.json")
print_all_results(singlerankkwds)

In [None]:
# run topicrank and store to file
topicrankkwds = pubmedake.run_pke_model(total, top_k_list, 
                                   pubmedake.run_topicrank,
                                   "../results/sample_topicrankkwds.json")
print_all_results(topicrankkwds)

In [None]:
# run positionrank and store to file
positionrankkwds = pubmedake.run_pke_model(total, top_k_list, 
                                   pubmedake.run_positionrank,
                                   "../results/sample_positionrankkwds.json")
print_all_results(positionrankkwds)

In [None]:
# run positionrank and store to file
run_multirankkwds = pubmedake.run_pke_model(total, top_k_list, 
                                   pubmedake.run_multirank,
                                   "../results/sample_multirankkwds.json")
print_all_results(run_multirankkwds)