##### The Aryn Partitioner is configured to use the Aryn Partitioning Service to provide fast, GPU-powered performance. Go to [aryn.ai/sign-up ](aryn.ai/sign-up) to get a free API key for the service. You can also run the Aryn Partitioner locally by changing `use_partitioning_service` to `False`. Though you can use CPU to run the Aryn Partitioner, it is recommended to use an NVIDIA GPU for good performance.


##### In this example, we will write the output of the Sycamore from pdf to a Vector Store of Langchain.


In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms.openai import OpenAI
from langchain.callbacks import get_openai_callback

import os

import sycamore 
from sycamore.data import Document
from sycamore.transforms.partition import ArynPartitioner


Replace the `aryn_api_key` with your key 

In [None]:
aryn_api_key = 'aryn-api-key'

In [None]:
# requirements to be added 
#  faiss-cpu==1.7.4
#  langchain-community


In [None]:
work_dirs = []

work_dir = './data/2306.07303.pdf'
work_dirs.append(work_dir)

In [None]:
context = sycamore.init()
pdf_docset = context.read.binary(work_dirs, binary_format="pdf")


partitioned_docset = pdf_docset.partition(
    partitioner=ArynPartitioner(threshold=0.35, use_ocr = False ,batch_at_a_time=True,  extract_table_structure=True, aryn_api_key = aryn_api_key) 
    ,num_gpus=0.1
        )

In [None]:
text = ""
for doc in partitioned_docset.take_all():
    for doci in doc.elements:
        print(doci)
        if doci.type == "table":
            text +=  doci['table'].to_csv()
        elif doci.text_representation:
            text +=  doci.text_representation


In [None]:

text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_overlap = 200,
    chunk_size = 1000,
    length_function = len 
)

chunks = text_splitter.split_text(text)

embedding = OpenAIEmbeddings()
faiss_index = FAISS.from_texts(chunks, embedding)


In [None]:
while True:
    user_question = input()
    docs = faiss_index.similarity_search(user_question, k=5)

    llm = OpenAI()
    chain = load_qa_chain(llm, chain_type= "stuff")
    with get_openai_callback() as cb:
        response = chain.run(input_documents=docs, question=user_question)
        print(cb)
        print(response)
