In [1]:
import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [2]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.text_splitter import SentenceSplitter


In [3]:
def load_documents(file_path, num_pages=None):
  if num_pages:
    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()[:num_pages]
  else:
    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
  return documents

def create_nodes(documents, chunk_size=2000, chunk_overlap=450):
  node_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  nodes = node_parser.get_nodes_from_documents(documents)
  return nodes


# load documents
documents1 = load_documents("E:\Plutomen\Pdf Data\M-Q-CM-50 MODIFICATION IN CONTROLLER ASSY EPS (1).PDF")
# documents2 = load_documents("dense_x_retrieval.pdf", 9)
# documents3 = load_documents("llama_beyond_english.pdf", 7)

# create nodes
nodes1 = create_nodes(documents1)
# nodes2 = create_nodes(documents2)
# nodes3 = create_nodes(documents3)



In [7]:
print(nodes1)



In [16]:
from llama_index.llms import OpenAI
from llama_index.evaluation import generate_question_context_pairs

# web_search_queries qa template
web_search_queries_qa_tmpl = """\
Context information is below.

---------------------
{context_str}
---------------------

Given the context information and not prior knowledge. \
generate queries based on the below task. \

Task:
Your task is to create {num_questions_per_chunk} Web search-like queries. \
Restrict the queries to the context information provided. \

Following is the explaination for Web search-like queries: \
Shortened queries similar to those commonly entered into a search engine
An example query: Best retrieval concept
"
"""
llm = OpenAI(model='gpt-3.5-turbo-instruct', temperature=0.3)

web_search_queries_single_document = generate_question_context_pairs(
    nodes1, llm=llm, num_questions_per_chunk=5, qa_generate_prompt_tmpl = web_search_queries_qa_tmpl
)


100%|██████████| 2/2 [00:03<00:00,  1.82s/it]


In [15]:
# Web search queries
queries = list(web_search_queries_single_document.queries.values())
queries

['How to modify Controller Assy, EPS in Maruti Suzuki Swift, Swift Dzire, Ertiga & Ertiga (SHVS)?',
 '"How to replace Controller Assy, EPS in Maruti Suzuki vehicles with DTC C1113?"',
 '"What is the cut-off VIN for Old Swift, Old Swift Dzire, Old Ertiga, and Old Ertiga (SHVS) models in Maruti Suzuki India Limited?"']

In [17]:
# Web search queries
queries = list(web_search_queries_single_document.queries.values())
queries

 '"What is the modification in Controller Assy, EPS for Maruti Suzuki Ertiga?"',
 '"Where can I find the updated part number for the Controller Assy, EPS in Maruti Suzuki Swift Dzire?"',
 '"What is the cause of hard steering operation in Maruti Suzuki Ertiga (SHVS)?"',
 '"Are there any previous bulletins related to Controller Assy, EPS modification for Maruti Suzuki models?"',
 '"How to replace Controller Assy, EPS in Maruti Suzuki vehicles?"',
 '"What is the procedure for diagnosing DTC C1113 in Maruti Suzuki vehicles?"',
 '"How to identify the root cause of a customer complaint in Maruti Suzuki vehicles?"',
 '"What is the cut-off VIN for Old Swift, Old Swift Dzire, Old Ertiga, and Old Ertiga (SHVS) models in Maruti Suzuki vehicles?"',
 '"What is the necessary road test to confirm elimination of a problem after replacing a part in Maruti Suzuki vehicles?"']