In [24]:
from llama_index.readers.file import PyMuPDFReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.ollama import Ollama
from qdrant_client import QdrantClient
from llama_index.core.schema import TextNode, NodeWithScore
from sentence_transformers import SentenceTransformer
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.vector_stores import VectorStoreQuery
from llama_index.core import PromptTemplate, Settings
from llama_index.core.query_engine import RetrieverQueryEngine

model = SentenceTransformer("all-MiniLM-L6-v2")

In [25]:
llm = Ollama(
    model='phi3',
)
Settings.llm=llm

In [4]:
loader = PyMuPDFReader()
documents = loader.load(file_path="input_file/TFM_Memoria.pdf",)

In [5]:
text_parser = SentenceSplitter(
    chunk_size=1024,
    # separator=" ",
)
text_chunks = []
# maintain relationship with source doc index, to help inject doc metadata in (3)
doc_idxs = []
for doc_idx, doc in enumerate(documents):
    cur_text_chunks = text_parser.split_text(doc.text)
    text_chunks.extend(cur_text_chunks)
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))
    


nodes = []
for idx, text_chunk in enumerate(text_chunks):
    node = TextNode(
        text=text_chunk,
    )
    src_doc = documents[doc_idxs[idx]]
    node.metadata = src_doc.metadata
    nodes.append(node)

In [6]:
for node in nodes:
    node_embedding = model.encode(node.get_content(metadata_mode="all"))
    node.embedding = node_embedding

In [7]:
client = QdrantClient(location=':memory:')
vector_store = QdrantVectorStore(client=client, collection_name='RAG')
vector_store.add(nodes)

  self._client.create_payload_index(


['d226e550-0aef-4232-9222-dcbbc6cf5e3c',
 '1f15e02a-532b-433b-9599-aee35d9cd846',
 'f2caede2-5cc4-4ce1-9ebb-723dca41e269',
 'b9e4cb46-fb4f-4134-a99d-f08189ed7fb5',
 'b4514001-719c-445c-9990-bbd7f952d302',
 '785c5e84-d86f-4e89-9323-22017b0083c7',
 'e17f007a-47ce-44e3-9c6c-116d141c7f4b',
 '26e90df6-7a8c-4a12-90e2-0213936ca5c7',
 'ec114991-f31b-49da-8bc0-cc5ff68faf09',
 '53756407-9f73-42fe-9dbf-9bd09e42f4e0',
 '4a894c16-2e0b-4cda-bb6a-4b5275abead3',
 'd341b1ff-22cf-4ba0-b509-ba8b09919627',
 'fd8e46c3-cbee-4614-82f2-9863bd319818',
 '7059ec86-ee5e-434c-9196-c676c5f52dd9',
 '1779ea28-6bf7-4558-98bc-2f13bf38125f',
 '2d1995ce-3575-4adb-94e8-4da576914376',
 '0d5fc6a6-0b85-40a5-b6e1-18e858cb868f',
 '7bc354fe-e1b1-4653-acd0-f61fc797abd3',
 '87855ab8-3f56-43f5-83c5-6a29e994374d',
 'eb8dde2c-1fa4-4348-a02c-72048c2809e4',
 '67faf8d8-0837-4304-b265-eedba851a945',
 'f44a82d9-62db-49f4-92c8-24201acd63d9',
 '33435644-272e-44dd-846a-b47911157b96',
 'f34d9706-ec5c-4061-8256-bf911d1cd894',
 '98dda7c7-81da-

In [8]:
query_str = "Which algorithms were used?"
query_embedding = model.encode(query_str)

In [None]:
query_mode = "default"
# query_mode = "sparse"
# query_mode = "hybrid"

vector_store_query = VectorStoreQuery(
    query_embedding=query_embedding, similarity_top_k=5, mode=query_mode
)
query_result = vector_store.query(vector_store_query)
hits = [] 
for node in query_result.nodes:
    hits.append(node.get_content())



TextNode(id_='f44a82d9-62db-49f4-92c8-24201acd63d9', embedding=None, metadata={'total_pages': 33, 'file_path': 'input_file/TFM_Memoria.pdf', 'source': '20'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='in which given a location and 2 mutated sequences that come from the same sequence are crossed\nover. An iteration of this algorithm is a mutation for every position and all the possible crossovers.\nAfter an iteration, all the resulting sequences are evaluated and the 200 best are chosen. This value\nis a parameter and can be changed, the time of the optimization with the genetic algorithm will\nincrease exponentially as this parameter goes up. This high number was decided because of the\nthought that to get to the customer journey with the best score you probably have to go through\nnot-so-optimal sequences. This process is done 15 times. This value is a parameter and can be\nchange

In [10]:
from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetriever
from typing import Any, List, Optional


class VectorDBRetriever(BaseRetriever):
    """Retriever over a postgres vector store."""

    def __init__(
        self,
        vector_store: QdrantVectorStore,
        embed_model: Any,
        query_mode: str = "default",
        similarity_top_k: int = 2,
    ) -> None:
        """Init params."""
        self._vector_store = vector_store
        self._embed_model = embed_model
        self._query_mode = query_mode
        self._similarity_top_k = similarity_top_k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve."""
        query_embedding = self._embed_model.encode(query_bundle.query_str)
        vector_store_query = VectorStoreQuery(
            query_embedding=query_embedding,
            similarity_top_k=self._similarity_top_k,
            mode=self._query_mode,
        )
        query_result = vector_store.query(vector_store_query)

        nodes_with_scores = []
        for index, node in enumerate(query_result.nodes):
            score: Optional[float] = None
            if query_result.similarities is not None:
                score = query_result.similarities[index]
            nodes_with_scores.append(NodeWithScore(node=node, score=score))

        return nodes_with_scores

In [26]:
retriever = VectorDBRetriever(
    vector_store, model, query_mode="default", similarity_top_k=2
)

query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)

In [27]:
response = query_engine.query(query_str)

ResponseError: model requires more system memory (8.9 GiB) than is available (4.4 GiB) (status code: 500)

In [12]:
template = (
    "We have provided context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, please answer the question: {query_str}\n"
)
qa_template = PromptTemplate(template)
