In [1]:
%load_ext autoreload
%autoreload 2
from src.services.file_service import load_pdf, split_document
from src.services.vectordb_service import ChromaDB
from src.utils.util import pretty_print_docs
from config import TEMPLATE, MODEL, DOC_PATH, CHUNK_SIZE, CHUNK_OVERLAP

In [2]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

import pprint

In [3]:
pprint.pp(TEMPLATE)

('Use ONLY the following pieces of context to answer the question at the end.\n'
 "If you don't know the answer, just say that you don't know, NEVER make up an "
 'answer.\n'
 'Summarize in bullet point format. Keep the answer as concise as possible.\n'
 '{context}\n'
 'Question: {question}\n'
 'Helpful Answer:')


In [4]:
pages = load_pdf(doc_path=DOC_PATH)

Loading file xLSTM_paper.pdf ...
File load correctly. Contains 55 pages


In [9]:
text_splits = split_document(pages, chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, strategy='recursive')

Document spliting. Chunk size 800 - Chunk overlap 400 - Strategy recursive
Splits generated 364


In [11]:
chroma = ChromaDB(text_splits)
vectorstore = chroma.get_vectorstore()

In [13]:
# question = "Does the xLSTM perform better than Transformers for long sequences prediction?"
question = "What are the main restrictions of this new xLSTM arquitecture?"
llm = ChatOpenAI(model_name=MODEL, temperature=0)

QA_CHAIN_PROMPT = PromptTemplate.from_template(TEMPLATE)

search_results = []

for search_type in ['similarity']: # 'mmr' 
    s_type = {}
    retreiver = vectorstore.as_retriever(search_type=search_type)
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=retreiver,
        return_source_documents=True,
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
    )
    result = qa_chain.invoke({"query": question})
    
    s_type[search_type] = result
    search_results.append(s_type)

In [14]:
#question = "What are the main restrictions of this new xLSTM arquitecture?"
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        print(search[key]['result'])

SIMILARITY
- Expensive computational load for large language experiments
- Not fully optimized architecture or hyperparameters, especially for larger xLSTM architectures
- Extensive optimization process needed for xLSTM to reach its full potential


In [11]:
# question = "Does the xLSTM perform better than Transformers for long sequences prediction?"
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        print(search[key]['result'])

SIMILARITY
- xLSTM demonstrates consistent strong performance on all tasks in the Long Range Arena
- xLSTM models maintain low perplexities for longer contexts compared to other methods
- xLSTM[1:1] is the best-performing non-Transformer model
- xLSTM models perform favorably on language modeling compared to Transformers and State Space Models

Yes, the xLSTM performs better than Transformers for long sequences prediction.


In [12]:
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        pretty_print_docs(search[key]['source_documents'])

SIMILARITY
Document 1 page 8 from data/raw/xLSTM_paper.pdf:

Test of xLSTM’s Long Context Capabilities on Long Range Arena. To assess xLSTM’s per-
formance on long sequences and large contexts, we compare different methods on the Long Range
Arena (Tay et al., 2021). xLSTM demonstrates consistent strong performance on all of the tasks,
suggesting that the xLSTM architecture is remarkably efficient in handling different aspects of long
context problems. For more details, see Appendix B.1.3.
4.2 Method Comparison and Ablation Study
The main question of this paper is, what can we achieve in language modeling when scaling up the
new LSTM variants. Therefore, we train xLSTMs, Transformers, State Space Models, and other
methods on 15B tokens from SlimPajama in an auto-regressive language modeling setting. We
----------------------------------------------------------------------------------------------------
Document 2 page 10 from data/raw/xLSTM_paper.pdf:

other methods, xLSTM models maintai