# ScienceSearch NLP Keywords with Visualization and Saving Results Example


## Import modules
Import modules and set up logging

In [None]:
# imports
from pathlib import Path
from sciencesearch.nlp.hyper import Hyper, algorithms_from_results
from sciencesearch.nlp.sweep import Sweep
from sciencesearch.nlp.models import Rake, Yake, KPMiner, Ensemble
from sciencesearch.nlp.train import train_hyper, load_hyper, run_hyper
from sciencesearch.nlp.search import Searcher
from operator import attrgetter
from IPython.core.display import HTML

# logging
import logging

logging.root.setLevel(logging.ERROR)  # silence pke warnings
slog = logging.getLogger("sciencesearch")
slog.setLevel(logging.WARNING)
from sciencesearch.nlp.visualize_kws import JsonView
from pathlib import Path

## Train and run models
In this example, we pick the 'best' result for each algorithm by training on two files with some user-provided keywords.
Then we extract keywords from a third file using the trained model.

Using a searcher which will read in training data from a search configuration, select the best model's keywords. 
We save the results of the hyperparameter training in a serialize Python "pickle" file so we don't need to repeat the training.
We could run the same hyperparameters on multiple files without retraining with `run_hyper()`

In [None]:
# TODO: Enter filepath to your configuration
config_fp = "slac_config.json"

# TODO: If you would like to re-train the model, delete `private_data/slac_logs/slac_hyper.pkl`

# Create a Searcher object from the configuration
slac_searcher = Searcher.from_config(config_file=config_fp)

### With Searcher object, search for all files that have a certain keyword

In [None]:
# find all files that have a keyword
keyword = "test"
slac_searcher.find(keyword)

In [None]:
# see all file keywords (predicted and training)
# slac_searcher.file_keywords

# see all predicted keywords
# slac_searcher.predicted_keywords

# see training keywords
# slac_searcher.training_keywords

# see separated training keywords and predicted keywords
# slac_searcher.training_and_predicted_keywords()

In [None]:
# view keywords in context of text logs (single file)
filename = "test.txt"
HTML(
    slac_searcher.view_keywords(
        show_training=True, show_predicted=True, textfilename=filename
    )
)

In [None]:
from IPython.core.display import HTML

# view keywords in context of text logs (all files)
HTML(
    slac_searcher.view_keywords(
        show_training=True, show_predicted=True, textfilename=None
    )
)