In [3]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from pprint import pprint

In [4]:
# Qdrant server URL
URL ="localhost"
# Qdrant dimension of the collection
DIMENSION = 384
# Qdrant collection name
COLLECTION_NAME = "data" #"incident data"
METRIC_NAME ="COSINE"

In [5]:
from llama_index.core.node_parser import SentenceSplitter
client = QdrantClient(URL, port=6333)
vector_store = QdrantVectorStore(
    client=client, collection_name=COLLECTION_NAME)
text_splitter = SentenceSplitter(
    chunk_size=1024)

In [None]:
import fitz
doc = fitz.open('data/civil_code.pdf')

In [43]:
text_chunks = []
# maintain relationship with source doc index, to help inject doc metadata in (3)
doc_idxs = []
for doc_idx, page in enumerate(doc):
    page_text = page.get_text("text")
    cur_text_chunks = text_splitter.split_text(page_text)
    text_chunks.extend(cur_text_chunks)
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))

In [44]:
len(doc_idxs)

641

In [47]:
nodes = []
for idx, text_chunk in enumerate(text_chunks):
    node = TextNode(
        text=text_chunk,
    )
    src_doc_idx = doc_idxs[idx]
    src_page = doc[src_doc_idx]
    nodes.append(node)

In [29]:
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

llm = OpenAI(base_url="http://localhost:1234/v1",
             api_base="http://localhost:1234/v1",
             api_key="lm-studio",
            #  model = 'lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf',
             )
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.llm = llm


In [49]:
from llama_index.core.extractors import (
    QuestionsAnsweredExtractor,
    TitleExtractor,
)
from llama_index.core.ingestion import IngestionPipeline

extractors = [
    TitleExtractor(nodes=5, llm=llm),
    QuestionsAnsweredExtractor(questions=3, llm=llm),
]

In [None]:
pipeline = IngestionPipeline(
    transformations=extractors,
)
nodes = await pipeline.arun(nodes=nodes, in_place=False)

In [None]:
pprint(nodes[0].metadata)

In [33]:
for node in nodes:
    node_embedding = Settings.embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

In [None]:
pprint(nodes[0])

In [35]:
vector_store.add(nodes)

['8020aa03-7453-4dbc-b86b-e6122b2ba1d1', 'b830bbf5-bca6-4796-b552-ff64ad4fb05a', '4affa93a-1eae-49e6-8d9f-359b615e8fe1', '1b74ede0-3fb3-434b-b190-367c15fd2909', '887ddf80-ec1e-46c9-8989-f011eec9a0ba', '95ac4d42-4ace-42b9-94eb-99941096f860', 'f2f2f7de-c563-49a2-9c12-b882fd2547bd', 'b34a3d9e-c95a-4bba-af66-4c833a2fd788', '84624b19-9b21-44e5-99ba-090c724cf6f3', '70ce99e7-b1b8-469f-bdc8-34d7cf257125', 'b19a610a-dbc5-4b1b-b6ea-a89d8deea42f', 'ffd85b92-eb34-4763-b1eb-bc3c7082e2c1', '23453b9e-691b-4cfb-8b4d-832682852e13', '64fed1f3-5ab0-4e02-aa6d-9797e084d6bb', 'a193736a-4838-4b55-a5b1-8b80814ff011', '23c9a2d8-8098-46ab-a0ce-e84f5a723782', 'b97c907a-9e80-42d2-b1ab-abb6e2308fa7', 'f35415bc-8419-49ae-b7c3-4ace3bb577bf', '3f67276d-c290-48c9-ba82-2b70a3ca19e6', '0a82734a-a64a-4750-b7d7-985e9c550618', '43609729-ceed-4638-adac-4fdb8c06c4a4', 'fd22fb18-da0f-4d4d-8f68-73069172abd9', 'e17a0a61-fe2e-41da-94d3-cc6760b6e212', '8989494d-274e-444b-bf53-7e092469dd15', '04a913a2-4ce0-4312-88a4-298130fa3f0c',

In [1]:
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

index = VectorStoreIndex.from_vector_store(vector_store=vector_store)


retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=3,
    vector_store_query_mode = 'hybrid'
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

# query
response = query_engine.query("What did the author do growing up?")
print(response)

  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'vector_store' is not defined