# LMQI with LlamaIndex

CONTENT:


---
CONCLUSIONS:

---
---

https://lmql.ai/docs/latest/lib/integrations/llama_index.html - outdated example with regard to query/query_engine \
https://docs.llamaindex.ai/en/stable/examples/llm/llama_2_llama_cpp/


NOTE: to runt the notebook
1. Remove 'local:' from llm = lmql.model("local:llama.cpp:/home/dorota/models/mistral-7b-instruct-v0.2.Q6_K.gguf", tokenizer="mistralai/Mistral-7B-Instruct-v0.2"), 
1. start a service in terminal with: lmql serve-model llama.cpp:/home/dorota/models/mistral-7b-instruct-v0.2.Q6_K.gguf --verbose True --n_gpu_layers 20 --n_ctx 0


In [None]:
import lmql
from llama_index.core import GPTVectorStoreIndex, VectorStoreIndex, SimpleDirectoryReader, ServiceContext, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from transformers import AutoTokenizer

In [None]:
# llama.cpp endpoint: https://lmql.ai/docs/models/llama.cpp.html#running-without-a-model-server
# tokenizer.model from https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/tree/main

llm = lmql.model("llama.cpp:/home/dorota/models/mistral-7b-instruct-v0.2.Q6_K.gguf", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", n_gpu_layers=10, n_ctx=0, verbose=True) 

In [None]:
# read in all documents from assigned folder
documents = SimpleDirectoryReader("/home/dorota/LLM-diploma-project/00_concept_tests/data/cancer_article").load_data() # -> list of 1 doc/page in article with metadata and tags (documents[0].text)

In [None]:
# set global variables to create vector embeddings for text nodes

Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.tokenizer = AutoTokenizer.from_pretrained('mistralai/Mistral-7B-Instruct-v0.2').encode

In [None]:
index = VectorStoreIndex.from_documents(documents) #[0:1]

Settings.llm = None # =None to enable correct setting in query_engine
query_engine = index.as_query_engine(streaming=True, llm=None) # llm=None sets llm to Settings.llm thus defined as None

In [None]:
similarity_top_k = 2

@lmql.query(model=llm)
async def index_query(question: str):
    '''lmql
    "You are a QA bot that helps users answer questions.\n"
    
    # ask the question
    "Question: {question}\n"

    # look up and insert relevant information into the context
    response = query_engine.query(question)
    information = "\n\n".join([s.node.get_text() for s in response.source_nodes])
    print(information)
    "\nRelevant Information: {information}\n"
    
    # generate a response
    "Your response based on relevant information:[RESPONSE]"
    '''


In [None]:
result = await index_query("What is the title of the article?", 
                   output_writer=lmql.stream(variable="RESPONSE"))
