In [1]:
%load_ext autoreload
%autoreload 2
from src.services.file_service import load_pdf, split_document
from src.services.vectordb_service import ChromaDB
from src.utils.util import pretty_print_docs
from config import TEMPLATE, MODEL, DOC_PATH

In [3]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

import pprint

In [4]:
pprint.pp(TEMPLATE)

('Use ONLY the following pieces of context to answer the question at the end.\n'
 "If you don't know the answer, just say that you don't know, NEVER make up an "
 'answer.\n'
 'Summarize in bullet point format. Keep the answer as concise as possible.\n'
 '{context}\n'
 'Question: {question}\n'
 'Helpful Answer:')


In [5]:
pages = load_pdf(doc_path=DOC_PATH)

Loading file xLSTM_paper.pdf ...
File load correctly. Contains 55 pages


In [7]:
text_splits = split_document(pages, chunk_size=800, chunk_overlap=400, strategy='recursive')
chroma = ChromaDB(text_splits)
vectorstore = chroma.get_vectorstore()

Document spliting. Chunk size 800 - Chunk overlap 400 - Strategy recursive
Splits generated 364
Cleaning Chroma DB
Creating a local embedding vector DB on directory data/chroma
DB created successfuly. Collection count: 364 



In [8]:
question = "What are the major improvments over the previous LSTM arquitecture?"
llm = ChatOpenAI(model_name=MODEL, temperature=0)

QA_CHAIN_PROMPT = PromptTemplate.from_template(TEMPLATE)

search_results = []

for search_type in ['similarity']: # 'mmr' 
    s_type = {}
    retreiver = vectorstore.as_retriever(search_type=search_type)
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=retreiver,
        return_source_documents=True,
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
    )
    result = qa_chain.invoke({"query": question})
    
    s_type[search_type] = result
    search_results.append(s_type)

In [9]:
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        print(search[key]['result'])

SIMILARITY
- Exponential gating and novel memory structures
- Introduction of sLSTM with scalar memory, scalar update, and memory mixing
- Introduction of mLSTM with matrix memory and covariance update rule, fully parallelizable
- Integration of these LSTM extensions into residual block backbones to create xLSTM blocks and architectures


In [12]:
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        pretty_print_docs(search[key]['source_documents'])

SIMILARITY
Document 1 page 0 from data/raw/xLSTM_paper.pdf:

core marked the dawn of a new era, outpacing LSTMs at scale. We now raise a
simple question: How far do we get in language modeling when scaling LSTMs to
billions of parameters, leveraging the latest techniques from modern LLMs, but
mitigating known limitations of LSTMs? Firstly, we introduce exponential gating
with appropriate normalization and stabilization techniques. Secondly, we modify
the LSTM memory structure, obtaining: (i) sLSTM with a scalar memory, a scalar
update, and new memory mixing, (ii) mLSTM that is fully parallelizable with a
matrix memory and a covariance update rule. Integrating these LSTM extensions
into residual block backbones yields xLSTM blocks that are then residually stacked
into xLSTM architectures. Exponential gating and modified memory structures
----------------------------------------------------------------------------------------------------
Document 2 page 2 from data/raw/xLSTM_paper.pdf:

