In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.text_splitter import TokenTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI

import pprint


In [2]:
file_name = "data/raw/xLSTM_paper.pdf"
print(f'Loading file {file_name} ...')
loader = PyPDFLoader(file_name)
pages = loader.load()

print(f'File load correctly and contains {len(pages)} pages ')

Loading file data/raw/xLSTM_paper.pdf ...
File load correctly and contains 55 pages 


In [3]:
chunk_size = 500
chunk_overlap = 200
splitter_policy = 'recursive'
print(f'Document spliting. Chunk size {chunk_size} - Chunk overlap {chunk_overlap} - Policy {splitter_policy}')

if (splitter_policy == 'recursive'):
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
elif (splitter_policy == 'character'):
    splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
else:
    splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

Document spliting. Chunk size 500 - Chunk overlap 200 - Policy recursive


In [4]:
split_texts = splitter.split_documents(pages)
print(f'Amount of splits generated {len(split_texts)}')

Amount of splits generated 520


In [5]:
persist_directory = 'data/chroma/'
print(f'Creating a local embedding vector DB on directory {persist_directory}')

embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(
    documents=split_texts,
    embedding=embedding,
    persist_directory=persist_directory
)

print(f'DB created successfuly. Collection count: {vectordb._collection.count()} \n')

Creating a local embedding vector DB on directory data/chroma/
DB created successfuly. Collection count: 520 



In [10]:
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1} page {d.metadata['page']} from {d.metadata['source']}:\n\n" + d.page_content for i, d in enumerate(docs)]))

In [11]:
question = "What are the major improvments over the previous or vanilla LSTM arquitecture?"
related_docs = vectordb.similarity_search(question,k=5)
pretty_print_docs(related_docs)

Document 1 page 40 from data/raw/xLSTM_paper.pdf:

training for LSTMs at this scale. Replacing every second LSTM layer by a non-gated feed-forward
network with GeLU activation function (similar to Vaswani et al.), which corresponds to the post
up-projection backbone (see Figure 3) further boosts performance. Adding Exponential Gating to this
architecture yields the sLSTM as depicted in Figure 9, with another large performance improvement.
Finally, adding the best Matrix Memory variant found in Table 8 by replacing some sLSTM blocks
----------------------------------------------------------------------------------------------------
Document 2 page 1 from data/raw/xLSTM_paper.pdf:

These limitations of LSTM have paved the way for the emergence of Transformers (Vaswani et al.,
2017) in language modeling. What performances can we achieve in language modeling when
overcoming these limitations and scaling LSTMs to the size of current Large Language Models?
2
---------------------------------

In [12]:
# Wrap our vectorstore
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever()
)
compressed_docs = compression_retriever.invoke(question)

In [13]:
pretty_print_docs(compressed_docs)

Document 1 page 40 from data/raw/xLSTM_paper.pdf:

Replacing every second LSTM layer by a non-gated feed-forward network with GeLU activation function (similar to Vaswani et al.), which corresponds to the post up-projection backbone (see Figure 3) further boosts performance. Adding Exponential Gating to this architecture yields the sLSTM as depicted in Figure 9, with another large performance improvement. Finally, adding the best Matrix Memory variant found in Table 8 by replacing some sLSTM blocks
----------------------------------------------------------------------------------------------------
Document 2 page 0 from data/raw/xLSTM_paper.pdf:

- Firstly, we introduce exponential gating with appropriate normalization and stabilization techniques.
- Secondly, we modify the LSTM memory structure, obtaining: (i) sLSTM with a scalar memory, a scalar update, and new memory mixing, (ii) mLSTM that is fully parallelizable with a matrix memory and a covariance update rule.
------------------

In [42]:
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

In [29]:
# Build prompt
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, NEVER make up an answer. Use three sentences maximum. Keep the answer as concise as possible. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template) # Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(), #compression_retriever
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

## Utilizando compression en vector retrival

In [16]:
question = "What are the major improvments over the previous or vanilla LSTM arquitecture?"
result = qa_chain.invoke({"query": question})
pprint.pp(result["result"])

(' The major improvements over the previous or vanilla LSTM architecture '
 'include replacing every second LSTM layer with a non-gated feed-forward '
 'network with GeLU activation function, adding Exponential Gating to this '
 'architecture, and integrating these LSTM extensions such as sLSTM and mLSTM. '
 'These improvements also involve modifying the LSTM memory structure, using '
 'pre-LayerNorm residual backbones, and incorporating a post up-projection '
 'block and a matrix memory.')


In [17]:
pretty_print_docs(result["source_documents"])

Document 1 page 40 from data/raw/xLSTM_paper.pdf:

Replacing every second LSTM layer by a non-gated feed-forward network with GeLU activation function (similar to Vaswani et al.), which corresponds to the post up-projection backbone (see Figure 3) further boosts performance. Adding Exponential Gating to this architecture yields the sLSTM as depicted in Figure 9, with another large performance improvement. Finally, adding the best Matrix Memory variant found in Table 8 by replacing some sLSTM blocks
----------------------------------------------------------------------------------------------------
Document 2 page 0 from data/raw/xLSTM_paper.pdf:

- exponential gating with appropriate normalization and stabilization techniques
- modify the LSTM memory structure
- sLSTM with a scalar memory, a scalar update, and new memory mixing
- mLSTM that is fully parallelizable with a matrix memory and a covariance update rule
- integrating these LSTM extensions
-------------------------------------

## Sin utilizar compression en vector retrival

In [21]:
question = "What are the major improvments over the previous or vanilla LSTM arquitecture?"
result = qa_chain.invoke({"query": question})
pprint.pp(result["result"])

(' The major improvements over the previous or vanilla LSTM architecture '
 'include the introduction of exponential gating with appropriate '
 'normalization and stabilization techniques, modification of the LSTM memory '
 'structure to obtain sLSTM and mLSTM variants, and integration of these LSTM '
 'extensions into the xLSTM architecture. These improvements have been shown '
 'to significantly boost performance in language modeling tasks.')


In [22]:
pretty_print_docs(result["source_documents"])

Document 1 page 1 from data/raw/xLSTM_paper.pdf:

These limitations of LSTM have paved the way for the emergence of Transformers (Vaswani et al.,
2017) in language modeling. What performances can we achieve in language modeling when
overcoming these limitations and scaling LSTMs to the size of current Large Language Models?
2
----------------------------------------------------------------------------------------------------
Document 2 page 40 from data/raw/xLSTM_paper.pdf:

training for LSTMs at this scale. Replacing every second LSTM layer by a non-gated feed-forward
network with GeLU activation function (similar to Vaswani et al.), which corresponds to the post
up-projection backbone (see Figure 3) further boosts performance. Adding Exponential Gating to this
architecture yields the sLSTM as depicted in Figure 9, with another large performance improvement.
Finally, adding the best Matrix Memory variant found in Table 8 by replacing some sLSTM blocks
---------------------------------

## Cambiando la extructura de la pregunta se obtuvieron mejores resultados (compression_retriever) 

In [30]:
question = "What are the major improvments over the previous LSTM arquitecture summarize in bullet points?"
result = qa_chain.invoke({"query": question})
print(result["result"])


- Introduction of exponential gating with normalization and stabilization techniques
- Modification of LSTM memory structure to include sLSTM with scalar memory and update, and mLSTM with matrix memory and covariance update rule
- These extensions allow for better handling of long context problems and improved efficiency in language modeling.


## Cambiando la extructura de la pregunta se obtuvieron mejores resultados (similarity retriever)

In [None]:
question = "What are the major improvments over the previous LSTM arquitecture summarize in bullet points?"
result = qa_chain.invoke({"query": question})
print(result["result"])

In [28]:
print(result["result"])


- Exponential gating with normalization and stabilization techniques
- Modification of LSTM memory structure, resulting in sLSTM and mLSTM variants
- Fully parallelizable mLSTM with matrix memory and covariance update rule
