In [16]:
%load_ext autoreload
%autoreload 2
from src.services.file_service import load_pdf, split_document
from src.services.vectordb_service import ChromaDB
from src.utils.util import pretty_print_docs
from config import TEMPLATE, MODEL

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI

import pprint

In [20]:
pprint.pp(TEMPLATE)

('Use ONLY the following pieces of context to answer the question at the end.\n'
 "If you don't know the answer, just say that you don't know, NEVER make up an "
 'answer.\n'
 'Summarize in bullet point format. Keep the answer as concise as possible.\n'
 '{context}\n'
 'Question: {question}\n'
 'Helpful Answer:')


In [21]:
pages = load_pdf()
text_splits = split_document(pages, chunk_size=500, chunk_overlap=200, strategy='recursive')
chroma = ChromaDB(text_splits)
vectorstore = chroma.get_vectorstore()

Loading file xLSTM_paper.pdf ...
File load correctly. Contains 55 pages


In [24]:
# Wrap our vectorstore
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectorstore.as_retriever()
)
#compressed_docs = compression_retriever.invoke(question)

In [25]:
def get_retriever(search_type):
    if search_type == 'compression':
        return compression_retriever
    else: 
        return vectorstore.as_retriever(search_type=search_type)

In [26]:
question = "What are the major improvments over the previous LSTM arquitecture?"
llm = ChatOpenAI(model_name=MODEL, temperature=0)

QA_CHAIN_PROMPT = PromptTemplate.from_template(TEMPLATE) # Run chain


search_results = []

for search_type in ['similarity', 'mmr', 'compression']:
    s_type = {}
    retreiver = get_retriever(search_type)
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=retreiver,
        return_source_documents=True,
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
    )
    result = qa_chain.invoke({"query": question})
    
    s_type[search_type] = result
    search_results.append(s_type)

In [27]:
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        print(search[key]['result'])

SIMILARITY
- Replacing every second LSTM layer with a non-gated feed-forward network with GeLU activation function
- Adding Exponential Gating to the architecture
- Introducing novel memory structures such as scalar memory, scalar update, and memory mixing
MMR
- xLSTM architecture is remarkably efficient in handling long context problems
- xLSTMs with new memory perform best
- xLSTM capabilities are boosted by exponential gating and modified memory structures
- xLSTMs outperform state-of-the-art Transformers and State Space Models in performance and scaling
COMPRESSION
- Replacing every second LSTM layer with a non-gated feed-forward network with GeLU activation function
- Adding Exponential Gating to the architecture
- Introducing novel memory structures
- Introducing the new sLSTM and mLSTM variants


# Observaciones

Tanto similarity como compression respondieron un fragmento no del todo correcto para la pregunta.  
"Replacing every second LSTM layer with a non-gated feed-forward network with GeLU activation function"  

Observando el source veo que viene de un fragmento mas grande donde se explica los primeros pasos que se hicieron con la implementacio de LSTM para llegar a la version final.  

El tamaño del chunk no le permitio abarcar el contexto donde se ve que es un proceso de primeros cambios y no de la version final. 

In [30]:
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        pretty_print_docs(search[key]['source_documents'])
        print()

SIMILARITY
Document 1 page 1 from data/raw/xLSTM_paper.pdf:

These limitations of LSTM have paved the way for the emergence of Transformers (Vaswani et al.,
2017) in language modeling. What performances can we achieve in language modeling when
overcoming these limitations and scaling LSTMs to the size of current Large Language Models?
2
----------------------------------------------------------------------------------------------------
Document 2 page 40 from data/raw/xLSTM_paper.pdf:

training for LSTMs at this scale. Replacing every second LSTM layer by a non-gated feed-forward
network with GeLU activation function (similar to Vaswani et al.), which corresponds to the post
up-projection backbone (see Figure 3) further boosts performance. Adding Exponential Gating to this
architecture yields the sLSTM as depicted in Figure 9, with another large performance improvement.
Finally, adding the best Matrix Memory variant found in Table 8 by replacing some sLSTM blocks
----------------------