In [1]:
# Install Dependencies
! pip install git+https://github.com/deepset-ai/haystack.git
! pip install urllib3==1.25.4
!pip install pandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/deepset-ai/haystack.git
  Cloning https://github.com/deepset-ai/haystack.git to /tmp/pip-req-build-9bh9mejv
  Running command git clone -q https://github.com/deepset-ai/haystack.git /tmp/pip-req-build-9bh9mejv
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting python-docx
  Downloading python-docx-0.8.11.tar.gz (5.6 MB)
[K     |████████████████████████████████| 5.6 MB 30.7 MB/s 
[?25hCollecting huggingface-hub>=0.5.0
  Downloading huggingface_hub-0.11.0-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 59.8 MB/s 
[?25hCollecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[K     |████████████████████████████████| 43 kB 2.1 MB/s 
[?25hCollecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 

In [2]:
# Install Elastic Search for Document Retriver
! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q
! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
! chown -R daemon:daemon elasticsearch-7.9.2

import os
from subprocess import Popen, PIPE, STDOUT
es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],
                   stdout=PIPE, stderr=STDOUT,
                   preexec_fn=lambda: os.setuid(1)  # as daemon
                  )
# wait until ES has started
! sleep 30

In [3]:
%%bash
curl -sX GET "localhost:9200/"

{
  "name" : "1a86a422b42a",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "49n3phcsRjW574D80zQZZw",
  "version" : {
    "number" : "7.9.2",
    "build_flavor" : "default",
    "build_type" : "tar",
    "build_hash" : "d34da0ea4a966c4e49417f2da2f244e3e97b4e6e",
    "build_date" : "2020-09-23T00:45:33.626720Z",
    "build_snapshot" : false,
    "lucene_version" : "8.6.2",
    "minimum_wire_compatibility_version" : "6.8.0",
    "minimum_index_compatibility_version" : "6.0.0-beta1"
  },
  "tagline" : "You Know, for Search"
}


In [4]:
# Mount the google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


##### Successfully installed Haystack and Elastic Search


In [5]:
# Import libraries and dependencies 
import logging
import os
import re
import pandas as pd
from haystack.document_stores import ElasticsearchDocumentStore

In [6]:
# Define the logger
logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("haystack").setLevel(logging.INFO)

In [7]:
# Get the host where Elasticsearch is running, default to localhost
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")
document_store = ElasticsearchDocumentStore(host=host, username="", password="", index="document")


INFO:haystack.telemetry:Haystack sends anonymous usage data to understand the actual usage and steer dev efforts towards features that are most meaningful to users. You can opt-out at anytime by calling disable_telemetry() or by manually setting the environment variable HAYSTACK_TELEMETRY_ENABLED as described for different operating systems on the documentation page. More information at https://docs.haystack.deepset.ai/docs/telemetry


In [8]:
# Load the Dataset that has been extracted
data = pd.read_csv("/content/drive/MyDrive/data_with_text.csv")

In [9]:
data.head()

Unnamed: 0.1,Unnamed: 0,name,html_page_link,tags,text_data
0,0,What's New,set_N20140200.html,"[""What's New""]",JavaScript must be enabled to correctly display this content Sneak Peeks R...
1,1,Sneak Peeks,chapter_N3859207.html,"[""What's New"", 'Sneak Peeks']",Previous JavaScript must be enabled to correctly display this content What...
2,2,Release Notes,book_N3865324.html,"[""What's New"", 'Release Notes']",Previous JavaScript must be enabled to correctly display this content What...
3,3,NetSuite 2022.2 Release Notes,chapter_N3944673.html,"[""What's New"", 'Release Notes', 'NetSuite 2022.2 Release Notes']",Previous JavaScript must be enabled to correctly display this content What...
4,4,Accounting,section_4316104403.html,"[""What's New"", 'Release Notes', 'NetSuite 2022.2 Release Notes', 'Accounting']",Previous JavaScript must be enabled to correctly display this content What...


In [10]:
# Most of the document contains the 'Previous  JavaScript must be enabled to correctly display this content'. So, we have removed it.
# Convert the document to lower case and remove the trailing spaces.
# Remove the HTML Tags
processed_data = []

for item in data.text_data:
  if "Previous  JavaScript must be enabled to correctly display this content" in item:
    item = item.replace("Previous  JavaScript must be enabled to correctly display this content", "")
  item = item.strip()
  item = item.lower()
  sentence = re.sub("<.*?>", " ", item)
  processed_data.append(item)

In [11]:
data["processed_data"] = processed_data

In [12]:
data.head()

Unnamed: 0.1,Unnamed: 0,name,html_page_link,tags,text_data,processed_data
0,0,What's New,set_N20140200.html,"[""What's New""]",JavaScript must be enabled to correctly display this content Sneak Peeks R...,javascript must be enabled to correctly display this content sneak peeks rel...
1,1,Sneak Peeks,chapter_N3859207.html,"[""What's New"", 'Sneak Peeks']",Previous JavaScript must be enabled to correctly display this content What...,what's new important: netsuite 2022.2 sneak peeks enhancements and changes ...
2,2,Release Notes,book_N3865324.html,"[""What's New"", 'Release Notes']",Previous JavaScript must be enabled to correctly display this content What...,"what's new netsuite 2022.2 release notes suitecommerce, suitecommerce myacco..."
3,3,NetSuite 2022.2 Release Notes,chapter_N3944673.html,"[""What's New"", 'Release Notes', 'NetSuite 2022.2 Release Notes']",Previous JavaScript must be enabled to correctly display this content What...,"what's new release notes important: note: note: revision date: october 17, ..."
4,4,Accounting,section_4316104403.html,"[""What's New"", 'Release Notes', 'NetSuite 2022.2 Release Notes', 'Accounting']",Previous JavaScript must be enabled to correctly display this content What...,what's new release notes netsuite 2022.2 release notes netsuite 2022.2 incl...


In [13]:
# data shape
data.shape

(17935, 6)

###### We have 17935 documents are available.

In [14]:
# Converting the data format for haystack document store.
docs = []
for _, row in data.iterrows():
  docs.append({"content" : row["processed_data"], "meta": {"name": row["name"], "tags": row["tags"]}})

##### Define Elastic Search Document Store with similarity metric cosine

In [15]:
# Define the elastic search document store
import os
from haystack.document_stores import ElasticsearchDocumentStore

# Get the host where Elasticsearch is running, default to localhost
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")
elastic_document_store = ElasticsearchDocumentStore(host=host, username="", password="", index="document-dim", similarity="cosine", recreate_index=True)

INFO:haystack.document_stores.search_engine:Index 'label' deleted.


In [16]:
# Write the processed document to the elastic search document store
elastic_document_store.write_documents(docs)

In [17]:
print(f"Loaded {elastic_document_store.get_document_count()} documents")

Loaded 17918 documents


In [18]:
elastic_document_store.embedding_dim

768

In [19]:
# Define the BM25 Retriever
from haystack.nodes import BM25Retriever

bm25_retriever = BM25Retriever(elastic_document_store)

In [20]:
for item in bm25_retriever.retrieve("How to create Purchase Report by Vendor grouped by Class.", top_k=10):
  print(item.to_dict()["meta"])

{'tags': "['Payment Processing', 'Payment Processing Options', 'Electronic Bank Payments', 'Setting Up Electronic Bank Payments', 'Setting Up Payment Aggregation Methods']", 'name': 'Setting Up Payment Aggregation Methods'}
{'tags': "['Revenue and Expense Recognition', 'Advanced Revenue Management (Essentials) and (Revenue Allocation)', 'Reports for Advanced Revenue Management', 'Deferred Expense Rollforward Report', 'Deferred Expense Rollforward Transaction Details Report']", 'name': 'Deferred Expense Rollforward Transaction Details Report'}
{'tags': "['SCM (Supply Chain Management)', 'Vendors, Purchasing, and Receiving', 'Purchasing and Receiving', 'Procurement Dashboard', 'Using the Procurement Dashboard']", 'name': 'Using the Procurement Dashboard'}
{'tags': "['Revenue and Expense Recognition', 'Advanced Revenue Management (Essentials) and (Revenue Allocation)', 'Reports for Advanced Revenue Management', 'Deferred Expense Waterfall Detail Report']", 'name': 'Deferred Expense Waterf

In [21]:
# Load the test data for evaluating the reteriver
test_data = pd.read_csv("/content/drive/MyDrive/test_data_2.csv")

In [22]:
# Evaluating the reteriver
def evaluate_retriever(retriever, test_data, no_of_documents):
  pred_labels = []
  true_label_count = 0
  false_label_count = 0
  for _, row in test_data.iterrows():
    true_label = row["Document Name"]
    pred_documents = []
    for item in  retriever.retrieve(row["Question"], top_k=no_of_documents):
      document_name = item.to_dict()["meta"]["name"]
      pred_documents.append(document_name)
    if true_label in pred_documents:
      true_label_count += 1
    else:
      false_label_count += 1
    pred_labels.append(pred_documents)
  return pred_labels, true_label_count, false_label_count

In [28]:
pred_labels, true_label_count, false_label_count = evaluate_retriever(bm25_retriever, test_data, 10)

In [29]:
print("Recall of BM25 Reteriver is : ", (true_label_count/(true_label_count + false_label_count)))

Recall of BM25 Reteriver is :  0.423728813559322


## DensePassageRetriever

In [26]:
import os
from haystack.document_stores import ElasticsearchDocumentStore

# Get the host where Elasticsearch is running, default to localhost
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")
elastic_document_store_2 = ElasticsearchDocumentStore(host=host, username="", password="", index="document-dim-2", similarity="cosine", recreate_index=True)

INFO:haystack.document_stores.search_engine:Index 'document-dim-2' deleted.
INFO:haystack.document_stores.search_engine:Index 'label' deleted.


In [27]:
elastic_document_store_2.write_documents(docs)

In [30]:
from haystack.nodes import DensePassageRetriever

dense_parser_retriever = DensePassageRetriever(
    document_store=elastic_document_store_2,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base"
)

INFO:haystack.modeling.utils:Using devices: CUDA:0 - Number of GPUs: 1


Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/493 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/418M [00:00<?, ?B/s]

INFO:haystack.modeling.model.language_model:Auto-detected model language: english


Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/492 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.


Downloading pytorch_model.bin:   0%|          | 0.00/418M [00:00<?, ?B/s]

INFO:haystack.modeling.model.language_model:Auto-detected model language: english


In [31]:
elastic_document_store_2.update_embeddings(dense_parser_retriever)

INFO:haystack.document_stores.search_engine:Updating embeddings for all 17918 docs ...


Updating embeddings:   0%|          | 0/17918 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/7920 [00:00<?, ? Docs/s]

In [32]:
for item in dense_parser_retriever.retrieve("How to create Purchase Report by Vendor grouped by Class.", top_k=10):
  print(item.to_dict()["meta"])

{'tags': "['Merchandise Hierarchy', 'Merchandise Hierarchy Workbook', 'Merchandise Hierarchy Workbook Template']", 'name': 'Merchandise Hierarchy Workbook Template'}
{'tags': "['Order Fulfillment and Shipping', 'Order Fulfillment Automation', 'Automatic Location Assignment', 'Configuration Examples']", 'name': 'Configuration Examples'}
{'tags': "['SuiteCloud Development Framework XML Reference', 'Lists', 'itemcustomfield_itemsubtype']", 'name': 'itemcustomfield_itemsubtype'}
{'tags': "['SuiteCommerce Site Builder', 'Setting Up Your Site Builder Site', 'Setting Order Amount and Quantity']", 'name': 'Setting Order Amount and Quantity'}
{'tags': "['SuiteScript Code Samples Catalog', 'Use Cases', 'Setting the Purchase Order Exchange Rate']", 'name': 'Setting the Purchase Order Exchange Rate'}
{'tags': "['SCM (Supply Chain Management)', 'Item Record Management', 'Item Pricing', 'Creating Pricing Groups']", 'name': 'Creating Pricing Groups'}
{'tags': "['SuiteCloud Development Framework XML R

In [33]:
# Evaluating the Dense parser retriever
pred_labels, true_label_count, false_label_count = evaluate_retriever(dense_parser_retriever, test_data, 10)

In [34]:
print("Recall of Dense parser Reteriver is : ", (true_label_count/(true_label_count + false_label_count)))

Recall of Dense parser Reteriver is :  0.3050847457627119


# Embedding Retriever

In [35]:
import os
from haystack.document_stores import ElasticsearchDocumentStore

# Get the host where Elasticsearch is running, default to localhost
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")
elastic_document_store_3 = ElasticsearchDocumentStore(host=host, username="", password="", index="document-dim-2", similarity="cosine", recreate_index=True, embedding_dim=384)

In [36]:
elastic_document_store_3.write_documents(docs)

In [37]:
from haystack.nodes import EmbeddingRetriever

embedding_retriever = EmbeddingRetriever(
    document_store=elastic_document_store_3,
    embedding_model="sentence-transformers/all-MiniLM-L6-v2",
    model_format="sentence_transformers",
)
# Important:
# Now that we initialized the Retriever, we need to call update_embeddings() to iterate over all
# previously indexed documents and update their embedding representation.
# While this can be a time consuming operation (depending on the corpus size), it only needs to be done once.
# At query time, we only need to embed the query and compare it to the existing document embeddings, which is very fast.
elastic_document_store_3.update_embeddings(embedding_retriever)

INFO:haystack.modeling.utils:Using devices: CUDA:0 - Number of GPUs: 1
INFO:haystack.nodes.retriever.dense:Init retriever using embeddings of model sentence-transformers/all-MiniLM-L6-v2


Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

Updating embeddings:   0%|          | 0/17918 [00:00<?, ? Docs/s]

Batches:   0%|          | 0/313 [00:00<?, ?it/s]

Batches:   0%|          | 0/248 [00:00<?, ?it/s]

In [38]:
for item in embedding_retriever.retrieve("How to create Purchase Report by Vendor grouped by Class.", top_k=10):
  print(item.to_dict())

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'content': 'order management billing and invoices invoices invoice groups management note: note: creating an invoice group viewing the invoice group record viewing an individual invoice in an invoice group adding an invoice to an invoice group from the invoice groups record removing an invoice from an invoice group grouped invoices due date invoice groups and closed periods accepting payment for an invoice group generating and printing an invoice group pdf invoice groups and customer statements invoice group reporting and the a/r aging report  from the invoice record, you can add an invoice to an existing invoice group by using the add to group button. the add to group button appears on invoices that are not part of an invoice group but are eligible for grouping. if there are no invoice groups that match the criteria for the invoice, you cannot create a new invoice group from the invoice record. go to transactions > sales > create invoices > list. open the invoice you want to add to a

In [39]:
pred_labels, true_label_count, false_label_count = evaluate_retriever(embedding_retriever, test_data, 10)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [40]:
print("Recall of Embeddding Reteriver is : ", (true_label_count/(true_label_count + false_label_count)))

Recall of Embeddding Reteriver is :  0.9322033898305084


In [41]:
from haystack.nodes import FARMReader
model = "deepset/roberta-base-squad2"
reader = FARMReader(model, use_gpu=True)

INFO:haystack.modeling.utils:Using devices: CUDA:0 - Number of GPUs: 1
INFO:haystack.modeling.utils:Using devices: CUDA:0 - Number of GPUs: 1


Downloading config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

INFO:haystack.modeling.model.language_model: * LOADING MODEL: 'deepset/roberta-base-squad2' (Roberta)


Downloading pytorch_model.bin:   0%|          | 0.00/473M [00:00<?, ?B/s]

INFO:haystack.modeling.model.language_model:Auto-detected model language: english
INFO:haystack.modeling.model.language_model:Loaded 'deepset/roberta-base-squad2' (Roberta model) from model hub.


Downloading tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

INFO:haystack.modeling.utils:Using devices: CUDA:0 - Number of GPUs: 1


In [42]:
from haystack.pipelines import ExtractiveQAPipeline
pipeline = ExtractiveQAPipeline(reader, embedding_retriever)

query = "how to create purchase order?"
result = pipeline.run(query=query, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 10}})

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Inferencing Samples: 100%|██████████| 2/2 [00:00<00:00,  2.33 Batches/s]


In [None]:
from haystack.utils import print_answers

print_answers(result, details="all", max_text_len=100)


Query: how to create purchase order?
Answers:
[   <Answer {'answer': 'manually', 'type': 'extractive', 'score': 0.736153244972229, 'context': ' a special order purchase order is not automatically created. you must manually create the purchase ...', 'offsets_in_document': [{'start': 2859, 'end': 2867}], 'offsets_in_context': [{'start': 71, 'end': 79}], 'document_id': 'e775589d734aa37e4673c29acf20eb7f', 'meta': {'tags': "['SCM (Supply Chain Management)', 'Vendors, Purchasing, and Receiving', 'Purchasing and Receiving', 'Purchasing', 'Drop Shipment and Special Order Purchases']", 'name': 'Drop Shipment and Special Order Purchases'}}>,
    <Answer {'answer': 'from a purchase request', 'type': 'extractive', 'score': 0.6652361154556274, 'context': 'ors and multiple currencies. if this purchase order was created from a purchase request, the currenc...', 'offsets_in_document': [{'start': 4831, 'end': 4854}], 'offsets_in_context': [{'start': 64, 'end': 87}], 'document_id': '986f62c87c24a1e30d3

In [54]:
for item in result["answers"]:
  print(item.to_dict())

{'answer': ' creating a purchase order from a purchase request', 'type': 'extractive', 'score': 0.5344197750091553, 'context': 'hase requests entering a purchase request approving a purchase request creating a purchase order fro...', 'offsets_in_document': [{'start': 182, 'end': 232}], 'offsets_in_context': [{'start': 70, 'end': 120}], 'document_id': '7ad88acc185fea25d965bc8361228557', 'meta': {'tags': "['SCM (Supply Chain Management)', 'Vendors, Purchasing, and Receiving', 'Purchasing and Receiving', 'Purchase Requests', 'Enabling Purchase Requests']", 'name': 'Enabling Purchase Requests'}}
{'answer': ' from a purchase request', 'type': 'extractive', 'score': 0.34092792868614197, 'context': 'urchase request approving a purchase request creating a purchase order from a purchase request notif...', 'offsets_in_document': [{'start': 208, 'end': 232}], 'offsets_in_context': [{'start': 70, 'end': 94}], 'document_id': '7ad88acc185fea25d965bc8361228557', 'meta': {'tags': "['SCM (Supply Chain 