## __Quickstart__

Start extracting keywords in 4 simple steps

#### Step 0: Add Private Data Directory 
Add your private data to the repository before running keyword extraction. The private data folder includes data, training, and other SLAC specific information.

1. Import and unzip `private_data` into root folder

In [None]:
from pathlib import Path
import zipfile as zf
import os

# create private data directory

# use starter folder
p_zip = Path(
    "../private_data_starter.zip"
)  # assume this notebook is run from the `examples/` subdirectory

p_folder = Path(
    "../private_data"
)  # assume this notebook is run from the `examples/` subdirectory

# Unzip example private data if it exists and existing private data folder does not exist
if not p_folder.exists() and p_zip.exists():
    with zf.ZipFile("../private_data_starter.zip", "r") as files:
        for file in files.namelist():
            if not file.startswith("__MACOSX"):
                files.extract(file, "../")
    print("Starter private data folder is unzipped and ready")
# Create private_data folder from scratch if starter does not exist
elif not p_zip.exists() and not p_folder.exists():
    p_folder.mkdir(exist_ok=True)
    print(
        "Please populate private_data directory. For more details on creating this directory, see slac_search_demo.ipynb"
    )

#### Step 1: Initialize and Extract Keywords
This KeywordExplorer object will train the model and generate keywords

In [None]:
from sciencesearch.nlp.search import KeywordExplorer
from sciencesearch.nlp.slac_data_extractor import SLACDatabaseDataExtractor

# TODO: Set configuration filepath
config_fp = "config_files/slac_config_descriptions.json"

# Generate preprocessed input data

data_extractor = SLACDatabaseDataExtractor(config_file=config_fp)
data_extractor.process_experiment_descriptions()


# Load configuration and process documents
keyword_explorer = KeywordExplorer.from_config(config_fp)

#### Step 2: View extracted and training keywords for each file

In [None]:
# View extracted keywords for each file
predicted_keywords = keyword_explorer.predicted_keywords

for f, k in predicted_keywords.items():
    print(f"{f} => {', '.join(k)}\n")

#### Step 3: Search for documents containing specific keywords 

In [None]:
# TODO: Define keyword to search for
results = keyword_explorer.find("x-ray")
print(f"Found in documents: {results}")

#### Step 4: Visualize keywords

Graph keywords 

In [None]:
keyword_explorer.export(format="graph")

Visualize keywords in context of the input files

In [None]:
from IPython.core.display import HTML

# View keywords in context of text logs (single file)
# TODO: Set file name to {experiment_id}.txt
filename = "mfxl1015222.txt"
HTML(
    keyword_explorer.view_keywords(
        show_training=False, show_predicted=True, textfilename=filename
    )
)