In [1]:
# To run locally and access project folder
import os
import sys

current_dir = os.getcwd()
base_dir = os.path.dirname(current_dir)
sys.path.append(base_dir)

In [2]:
%load_ext autoreload
%autoreload 2
from src.services.file_service import load_pdf, split_document
from src.services.vectordb_service import ChromaDB
from src.utils.util import pretty_print_docs
from config import TEMPLATE, MODEL, DOC_PATH
DOC_PATH = base_dir+'/'+DOC_PATH+'/'

In [3]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI

import pprint

In [4]:
pprint.pp(TEMPLATE)

('Use ONLY the following pieces of context to answer the question at the end.\n'
 "If you don't know the answer, just say that you don't know, NEVER make up an "
 'answer.\n'
 'Summarize in bullet point format. Keep the answer as concise as possible.\n'
 '{context}\n'
 'Question: {question}\n'
 'Helpful Answer:')


In [5]:
pages = load_pdf(doc_path=DOC_PATH)

Loading file xLSTM_paper.pdf ...
File load correctly. Contains 55 pages


In [7]:

text_splits = split_document(pages, chunk_size=500, chunk_overlap=200, strategy='recursive')
chroma = ChromaDB(text_splits)
vectorstore = chroma.get_vectorstore()

Document spliting. Chunk size 500 - Chunk overlap 200 - Strategy recursive
Splits generated 520
Cleaning Chroma DB
Creating a local embedding vector DB on directory data/chroma
DB created successfuly. Collection count: 520 



In [None]:
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectorstore.as_retriever()
)
#compressed_docs = compression_retriever.invoke(question)

In [None]:
def get_retriever(search_type):
    if search_type == 'compression':
        return compression_retriever
    else: 
        return vectorstore.as_retriever(search_type=search_type)

In [29]:
question = "What are the major improvments over the previous or vanilla LSTM arquitecture?"
llm = ChatOpenAI(model_name=MODEL, temperature=0)

QA_CHAIN_PROMPT = PromptTemplate.from_template(TEMPLATE)

search_results = []

for search_type in ['similarity', 'mmr', 'compression']:
    s_type = {}
    retreiver = vectorstore.as_retriever(search_type=search_type)
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=retreiver,
        return_source_documents=True,
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
    )
    result = qa_chain.invoke({"query": question})
    
    s_type[search_type] = result
    search_results.append(s_type)

## Utilizando compression en vector retrival

In [16]:
question = "What are the major improvments over the previous or vanilla LSTM arquitecture?"
result = qa_chain.invoke({"query": question})
pprint.pp(result["result"])

(' The major improvements over the previous or vanilla LSTM architecture '
 'include replacing every second LSTM layer with a non-gated feed-forward '
 'network with GeLU activation function, adding Exponential Gating to this '
 'architecture, and integrating these LSTM extensions such as sLSTM and mLSTM. '
 'These improvements also involve modifying the LSTM memory structure, using '
 'pre-LayerNorm residual backbones, and incorporating a post up-projection '
 'block and a matrix memory.')


In [17]:
pretty_print_docs(result["source_documents"])

Document 1 page 40 from data/raw/xLSTM_paper.pdf:

Replacing every second LSTM layer by a non-gated feed-forward network with GeLU activation function (similar to Vaswani et al.), which corresponds to the post up-projection backbone (see Figure 3) further boosts performance. Adding Exponential Gating to this architecture yields the sLSTM as depicted in Figure 9, with another large performance improvement. Finally, adding the best Matrix Memory variant found in Table 8 by replacing some sLSTM blocks
----------------------------------------------------------------------------------------------------
Document 2 page 0 from data/raw/xLSTM_paper.pdf:

- exponential gating with appropriate normalization and stabilization techniques
- modify the LSTM memory structure
- sLSTM with a scalar memory, a scalar update, and new memory mixing
- mLSTM that is fully parallelizable with a matrix memory and a covariance update rule
- integrating these LSTM extensions
-------------------------------------

## Sin utilizar compression en vector retrival

In [21]:
question = "What are the major improvments over the previous or vanilla LSTM arquitecture?"
result = qa_chain.invoke({"query": question})
pprint.pp(result["result"])

(' The major improvements over the previous or vanilla LSTM architecture '
 'include the introduction of exponential gating with appropriate '
 'normalization and stabilization techniques, modification of the LSTM memory '
 'structure to obtain sLSTM and mLSTM variants, and integration of these LSTM '
 'extensions into the xLSTM architecture. These improvements have been shown '
 'to significantly boost performance in language modeling tasks.')


In [22]:
pretty_print_docs(result["source_documents"])

Document 1 page 1 from data/raw/xLSTM_paper.pdf:

These limitations of LSTM have paved the way for the emergence of Transformers (Vaswani et al.,
2017) in language modeling. What performances can we achieve in language modeling when
overcoming these limitations and scaling LSTMs to the size of current Large Language Models?
2
----------------------------------------------------------------------------------------------------
Document 2 page 40 from data/raw/xLSTM_paper.pdf:

training for LSTMs at this scale. Replacing every second LSTM layer by a non-gated feed-forward
network with GeLU activation function (similar to Vaswani et al.), which corresponds to the post
up-projection backbone (see Figure 3) further boosts performance. Adding Exponential Gating to this
architecture yields the sLSTM as depicted in Figure 9, with another large performance improvement.
Finally, adding the best Matrix Memory variant found in Table 8 by replacing some sLSTM blocks
---------------------------------

## Cambiando la extructura de la pregunta se obtuvieron mejores resultados (compression_retriever) 

In [30]:
question = "What are the major improvments over the previous LSTM arquitecture summarize in bullet points?"
result = qa_chain.invoke({"query": question})
print(result["result"])


- Introduction of exponential gating with normalization and stabilization techniques
- Modification of LSTM memory structure to include sLSTM with scalar memory and update, and mLSTM with matrix memory and covariance update rule
- These extensions allow for better handling of long context problems and improved efficiency in language modeling.


## Cambiando la extructura de la pregunta se obtuvieron mejores resultados (similarity retriever)

In [None]:
question = "What are the major improvments over the previous LSTM arquitecture summarize in bullet points?"
result = qa_chain.invoke({"query": question})
print(result["result"])

In [28]:
print(result["result"])


- Exponential gating with normalization and stabilization techniques
- Modification of LSTM memory structure, resulting in sLSTM and mLSTM variants
- Fully parallelizable mLSTM with matrix memory and covariance update rule
