In [4]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("leave_no_context_behind.pdf")
doc = loader.load()

In [5]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=300)

chunks = text_splitter.split_documents(doc)

print(len(chunks))

print(type(chunks[0]))

12
<class 'langchain_core.documents.base.Document'>


In [7]:
f = open('.demo_key.txt')

OPENAI_API_KEY = f.read()

In [8]:
from langchain_openai import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [9]:
from langchain_community.vectorstores import Chroma


db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma")


db.persist()

In [10]:
db_connection = Chroma(persist_directory="./chroma", embedding_function=embedding_model)

In [21]:
query = "Everything about the paper, Leave No Context Behind: Efficient Infinite Context Transformers with Infini-attention"

out = db_connection.similarity_search(query)


In [14]:
from langchain_openai import ChatOpenAI

f = open('.demo_key.txt')

open_ai_key = f.read()

chat_model = ChatOpenAI(model="gpt-3.5-turbo", api_key=open_ai_key, temperature=0)

In [24]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

SYSTEM_TEMPLATE = """
Answer the user's questions based on the below context. Give them in clear bullet points if required, for clarity.
If the context doesn't contain any relevant information to the question, don't make something up and just say "I don't know":

<context>
{context}
</context>
"""

question_answering_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            SYSTEM_TEMPLATE,
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

document_chain = create_stuff_documents_chain(chat_model, question_answering_prompt)

In [23]:
from langchain_core.messages import HumanMessage

document_chain.invoke(
    {
        "context": out,
        "messages": [
            HumanMessage(content="what experiments were conducted in this paper?")
        ],
    }
)

'In the paper, several experiments were conducted to demonstrate the effectiveness of the proposed approach. Here are some of the experiments mentioned:\n1. The approach was tested on long-context language modeling benchmarks.\n2. A passkey retrieval task with a 1M sequence length was solved using a 1B LLM.\n3. A book summarization task with a 500K length was performed using an 8B model.\n4. The model was trained with a 100K sequence length and showed improved perplexity.\n5. The model achieved a new state-of-the-art (SOTA) result on a 500K length book summarization task after continual pre-training and task fine-tuning.\n\nThese experiments aimed to showcase the performance and scalability of the proposed Infini-attention mechanism in handling infinitely long inputs with bounded memory and computation resources.'