In [1]:
%load_ext autoreload
%autoreload 2
from src.services.file_service import load_pdf, split_document
from src.services.vectordb_service import ChromaDB
from src.utils.util import pretty_print_docs
from config import TEMPLATE, MODEL, DOC_PATH, CHUNK_SIZE, CHUNK_OVERLAP

In [2]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

import pprint

In [3]:
pprint.pp(TEMPLATE)

('USE ONLY the following pieces of context to answer the question at the end.\n'
 "If you don't know the answer, just say that you don't know, NEVER MAKE UP AN "
 'ANSWER or \n'
 'give opinions out side the context.\n'
 'Summarize in bullet point format. Keep the answer as concise as possible.\n'
 '{context}\n'
 'Question: {question}\n'
 'Helpful Answer:')


In [4]:
pages = load_pdf(doc_path=DOC_PATH)

Loading file VIT.pdf ...
File load correctly. Contains 22 pages


In [5]:
text_splits = split_document(pages, chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, strategy='recursive')

Document spliting. Chunk size 800 - Chunk overlap 400 - Strategy recursive
Splits generated 157


In [6]:
chroma = ChromaDB(text_splits)
vectorstore = chroma.get_vectorstore()

Directory 'data/chroma' removed successfully.
Creating a local embedding vector DB on directory data/chroma
DB created successfuly. Collection count: 157 



In [14]:
# question = "Does the xLSTM perform better than Transformers for long sequences prediction?"
# question = "What are the main restrictions of this new xLSTM arquitecture?"
# question = "What is the major improvement of OmniVec over his predecessor?"
# question = "What are the caracteristics of OmniVec?"
# question = "What is the OmniVec backbone"
question = ""
llm = ChatOpenAI(model_name=MODEL, temperature=0)

QA_CHAIN_PROMPT = PromptTemplate.from_template(TEMPLATE)

search_results = []

for search_type in ['similarity']: # 'mmr' 
    s_type = {}
    retreiver = vectorstore.as_retriever(search_type=search_type)
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=retreiver,
        return_source_documents=True,
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
    )
    result = qa_chain.invoke({"query": question})
    
    s_type[search_type] = result
    search_results.append(s_type)

In [15]:
#question = "What are the main restrictions of this new xLSTM arquitecture?"
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        print(search[key]['result'])

SIMILARITY
- The OmniVec backbone consists of a shared backbone network
- It includes a modality specific encoder and task specific heads
- The framework facilitates end-to-end training and allows for the interchange of encoders and task heads for different modalities and tasks


In [12]:
#question = "What are the main restrictions of this new xLSTM arquitecture?"
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        print(search[key]['result'])

SIMILARITY
- OmniVec is a unified data and task agnostic learning framework with a single backbone
- It utilizes modalities in different domains to aid the learning process
- OmniVec has a novel training mechanism that groups tasks and constructs mini batches by mixing inter-modality datasets
- The framework is highly generalizable, robust, and can adapt well to seen and unseen tasks
- OmniVec can generalize well to tasks with different data distributions
- It has been tested on 22 datasets spanning across image, video, point cloud, depth, audio, and text
- OmniVec achieves state of the art or close to state of the art results in various tasks and modalities


In [13]:
for search in search_results:
    for key in search.keys():
        print(str.upper(key))
        pretty_print_docs(search[key]['source_documents'])

SIMILARITY
Document 1 page 6 from data/raw/Omnivec.pdf:

randomly, and OmniVec-4 follows the settings from Sec-
tion3. Comparatively, OmniVec-1 lags behind the others.
Both OmniVec-2 and OmniVec-3 outperform OmniVec-1
by around 30% to 45%, showing their efﬁcacy. However,
OmniVec-4, which combines both approaches, performs
better, emphasizing the beneﬁts of integrating tasks and
modalities.
Inﬂuence of size of the modality encoder. We evalu-
ated the impact of enlarging the base modality encoder to
the scale of our suggested network, using modality-speciﬁc
data. This change slightly improved performance. For
example, on ImageNet1K, the top-1 accuracy went from
88.5% with the base ViT [ 19] to 89.1% with the aug-
mented ViT having a similar parameter count, while Om-
niVec achieved 92.4%. These ﬁndings suggest that even
----------------------------------------------------------------------------------------------------
Document 2 page 7 from data/raw/Omnivec.pdf:

SSv2 Top-1 Acc 85.4 77.