In [1]:
from simphylib.chunker import PDFChunker
from simphylib.embedder import EmbeddingsSimphy
from simphylib.retriever import RetrieverSimphy

In [None]:
pdf_path = "docs/SimpScriptGPart4Ch4.pdf"  # Replace with your PDF file path


In [14]:

def main(pdf_path, loader="PyPDFLoader", splitter="TokenTextSplitter"):
    """
    Main function to load and process the PDF file.
    """
    chunker = PDFChunker(pdf_path=pdf_path, chunk_size=1024, chunk_overlap=256, loader=loader, splitter=splitter)

    docs = chunker.load()
    chunks = chunker.split()
    print(f"Loaded {len(docs)} documents and split into {len(chunks)} chunks.")

    # printing chunks 
    for i, chunk in enumerate(chunks):
        print(f"\nChunk {i+1}:")
        print(f"Content:\n\n {chunk.page_content} \n")  # Print first 200 characters of content
    
    query = "How to create a disc with radius 5"
    vectorstore = EmbeddingsSimphy(save_vectorstore=False).create_vectorstore(chunks)  # Create vector store with the chunks
    retriever = RetrieverSimphy(vectorstore=vectorstore)
    
    results = retriever.retrieve(query=query, k=5)
    print(f"Retrieved {len(results)} documents for query '{query}':")
    
    for i, doc in enumerate(results):
        print(f"\nResult {i+1}:")
        print(f"Content:\n\n {doc.page_content} \n")  # Print first 200 characters of content



In [15]:
loader = "PyPDFLoader"
splitter = "RecursiveCharacterTextSplitter"

main(pdf_path, loader=loader, splitter=splitter)

17:20:45 - INFO - Load pretrained SentenceTransformer: BAAI/bge-base-en-v1.5


Loaded 16 documents and split into 23 chunks.

Chunk 1:
Content:

 Represent this passage for retrieval: Accessing Body /Joint in Script
Body or joint already existing in simulation can be accessed by calling getBody() and getJoint() functions of \'world\' object.
For example:
var d=World.getBody("Disc"); //stores body of name \'Disc\' in variable d var jt=World.getJoint("joint1");
//stores joint of name \'joint1\' in variable jt 


Chunk 2:
Content:

 Represent this passage for retrieval: For example:
var d=World.getBody("Disc"); //stores body of name \'Disc\' in variable d var jt=World.getJoint("joint1");
//stores joint of name \'joint1\' in variable jt 


Chunk 3:
Content:

 Represent this passage for retrieval: //stores joint of name \'joint1\' in variable jt 


Chunk 4:
Content:

 Represent this passage for retrieval: Creating Body / Joints
1. By Defining New Objects
New Body or joint can be created by calling world.addXXX() and world.addXXXJoint() functions.
The following code cr

In [16]:
loader = "SimphyFileLoader"
splitter = "TokenTextSplitter"
main(pdf_path, loader=loader, splitter=splitter)

17:23:48 - INFO - Load pretrained SentenceTransformer: BAAI/bge-base-en-v1.5


Loaded 16 documents and split into 16 chunks.

Chunk 1:
Content:

 Represent this passage for retrieval: Accessing Body /Joint in Script
Body or joint already existing in simulation can be accessed by calling getBody() and getJoint() functions of \'world\' object.
For example:
var d=World.getBody("Disc"); //stores body of name \'Disc\' in variable d var jt=World.getJoint("joint1");
//stores joint of name \'joint1\' in variable jt 


Chunk 2:
Content:

 Represent this passage for retrieval: For example:
var d=World.getBody("Disc"); //stores body of name \'Disc\' in variable d var jt=World.getJoint("joint1");
//stores joint of name \'joint1\' in variable jt 


Chunk 3:
Content:

 Represent this passage for retrieval: //stores joint of name \'joint1\' in variable jt 


Chunk 4:
Content:

 Represent this passage for retrieval: Creating Body / Joints
1. By Defining New Objects
New Body or joint can be created by calling world.addXXX() and world.addXXXJoint() functions.
The following code cr

In [None]:
loader = "SimphyFileLoader"
splitter = "SentenceTransformersTokenTextSplitter"
main(pdf_path, loader=loader, splitter=splitter)

17:26:28 - INFO - Use pytorch device_name: cuda:0
17:26:28 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
