In [1]:
from llama_index.core import SimpleDirectoryReader, Document
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import Settings
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
# import qdrant_client
import torch
from typing import Optional


  from .autonotebook import tqdm as notebook_tqdm





In [2]:
# Load data from documents and split into smaller chunks
documents = SimpleDirectoryReader('./data').load_data()
Splitter = SentenceSplitter(chunk_size=512)
text_chunks = []
doc_idxs = []  # maintain relationship with source doc index
for doc_idx, doc in enumerate(documents):
    cur_text_chunks = Splitter.split_text(doc.text)
    text_chunks.extend(cur_text_chunks)

    # Store the index of the source document for each text chunk so that each element in the doc_idxs list corresponds to a text chunk in text_chunks.
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))

# Create TextNode instances for each chunk and associate metadata
# TextNode: store text chunks, can be sentences, paragraph or smaller units.
from llama_index.core.schema import TextNode
nodes = []
for idx, text_chunk in enumerate(text_chunks):
    node = TextNode(text=text_chunk)
    src_doc = documents[doc_idxs[idx]]
    node.metadata = src_doc.metadata
    nodes.append(node)

In [3]:
# Embed each text chunk using Hugging Face model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
for node in nodes:
    node_embedding = embed_model.get_text_embedding(node.get_content(metadata_mode="all"))
    node.embedding = node_embedding

  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [35]:
# embedding test
embeddings = embed_model.get_text_embedding("你好!")
print(len(embeddings))
print(embeddings[:10])

384
[-0.018322469666600227, -0.02181188017129898, 0.0523066371679306, 0.0056607965379953384, 0.0020047046709805727, 0.055593330413103104, 0.12054598331451416, 0.027539297938346863, 0.05524640902876854, -0.007476648315787315]


In [11]:
from llama_index.llms.ollama import Ollama

# bge-base embedding model
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

# ollama
Settings.llm = Ollama(model="llama3", request_timeout=60.0)

index = VectorStoreIndex.from_documents(
    documents,
)

In [5]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex.from_documents(documents)
vector_index.as_query_engine()

<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x277bfb9ff10>

In [6]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10) # chunk_overlap=2 means: 
# chunk1[this is a test], chunk2[a test for overlap], chunk3[for overlap among chunks]

# global
from llama_index.core import Settings

Settings.text_splitter = text_splitter

# per-index
index = VectorStoreIndex.from_documents(
    documents, transformations=[text_splitter]
)

In [7]:
index = VectorStoreIndex.from_documents(documents)

In [8]:
import chromadb
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# load some documents
documents = SimpleDirectoryReader("./data").load_data()

# initialize client, setting path to save data
db = chromadb.PersistentClient(path="./chroma_db")

# create collection
chroma_collection = db.get_or_create_collection("quickstart")

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# create your index
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

In [9]:
# create a query engine and query
query_engine = index.as_query_engine()
response = query_engine.query("What are the RAG current challenges?")
print(response)

RAG currently faces two new challenges. Firstly, it struggles with complex data sources integration, having expanded to include various data types such as semi-structured data like tables and structured data like knowledge graphs. Secondly, there is a demand for system interpretability, controllability, and more functional components, making the entire process more flexible but also more complex.


In [None]:
# Perform query using query engine
from llama_index.core.query_engine import RetrieverQueryEngine
query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)
query_str = "What is Stable Diffusion?"
response = query_engine.query(query_str)
print(response)

In [10]:
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

# build index
index = VectorStoreIndex.from_documents(documents)

# configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=7,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.6)],
)

# query
response = query_engine.query("What are the RAG current challenges?")
print(response)

According to the provided context, the current challenges faced by RAG systems include:

1. Complex data sources integration: RAG systems can no longer be confined to a single type of unstructured text data source but have expanded to include various data types, such as semi-structured data like tables and structured data like knowledge graphs.
2. New demands for system interpretability, controllability, and more functional components: The process is no longer confined to linear but is controlled by multiple control components for retrieval and generation, making the entire system more flexible and complex.

These challenges require RAG systems to adapt to new scenarios and incorporate diverse data sources, while also ensuring the system's interpretability, controllability, and functionality.


In [21]:
from llama_index.core.postprocessor import KeywordNodePostprocessor
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=3,
)

node_postprocessors = [
    KeywordNodePostprocessor(
        required_keywords=["RAG", "current", "challenges", "Component", "Workflow"], exclude_keywords=[""]
    )
]
query_engine = RetrieverQueryEngine.from_args(
    retriever, node_postprocessors=node_postprocessors
)
response = query_engine.query("What are the RAG current challenges?")
print(response)

RAG currently faces the following new challenges:

1. Complex data sources integration. RAG are no longer confined to a single type of unstructured text data source but have expanded to include various data types, such as semi-structured data like tables and structured data like knowledge graphs.
2. New demands for system interpretability, controllability, and more functional components.


In [29]:
from llama_index.core import PromptTemplate
prompt_template = PromptTemplate(
    template="""
    Please provide a concise and relevant answer to the following query, focusing on the key concepts. Make sure to include information related to "{include_keyword}".

    Query: {query}

    Answer:
    """
)


formatted_prompt = prompt_template.format(
    query="What are the RAG current challenges?",
    include_keyword="current challenges component workflow",
)
response = query_engine.query(formatted_prompt)
print(response)

The current challenges in RAG (Retrieval-Augmented Generation) systems include:

* Complex data sources integration, requiring the system to handle various data types, such as structured and unstructured data.
* New demands for system interpretability, controllability, and maintainability, which are crucial for complex systems like RAG.
* Component selection and optimization, as more neural networks are involved in the system, necessitating the choice of appropriate components for specific tasks and resource configurations.
* Workflow orchestration and scheduling, ensuring that components are executed in a specific order, processed in parallel under certain conditions, or judged by the LLM based on different outputs.

These challenges highlight the need for RAG systems to adapt to diverse application scenarios, increasing the complexity of system design, management, and maintenance.
