In [1]:
import torch
from llama_index.llms import LlamaCpp
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from llama_index import SimpleDirectoryReader
from llama_index import Document
import os
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index import VectorStoreIndex, ServiceContext, load_index_from_storage
from fastapi import FastAPI
from llama_index.core.postprocessor import MetadataReplacementPostProcessor, SentenceTransformerRerank

ImportError: cannot import name 'LlamaCpp' from 'llama_index.llms' (unknown location)

In [None]:
!pip install llama-index-llms-llama-cpp

Collecting llama-index-llms-llama-cpp
  Using cached llama_index_llms_llama_cpp-0.1.4-py3-none-any.whl.metadata (695 bytes)
Collecting llama-cpp-python<0.3.0,>=0.2.32 (from llama-index-llms-llama-cpp)
  Using cached llama_cpp_python-0.2.81.tar.gz (50.4 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting diskcache>=5.6.1 (from llama-cpp-python<0.3.0,>=0.2.32->llama-index-llms-llama-cpp)
  Using cached diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Using cached llama_index_llms_llama_cpp-0.1.4-py3-none-any.whl (6.0 kB)
Using cached diskcache-5.6.3-py3-none-any.whl (45 kB)
Building wheels for collected packages: llama-cpp-python
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for llama-cpp-python [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0

In [None]:

llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    # model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
    model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf',
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=4096,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": -1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)


def get_build_index(documents,llm,embed_model="local:BAAI/bge-small-en-v1.5",sentence_window_size=3,save_dir="./vector_store/index"):
  
  node_parser = SentenceWindowNodeParser(
      window_size = sentence_window_size,
      window_metadata_key = "window",
      original_text_metadata_key = "original_text"
  )

  sentence_context = ServiceContext.from_defaults(
      llm = llm,
      embed_model= embed_model,
      node_parser = node_parser,
  )

  if not os.path.exists(save_dir):
        # create and load the index
        index = VectorStoreIndex.from_documents(
            [documents], service_context=sentence_context
        )
        index.storage_context.persist(persist_dir=save_dir)
  else:
      # load the existing index
      index = load_index_from_storage(
          StorageContext.from_defaults(persist_dir=save_dir),
          service_context=sentence_context,
      )

  return index


def get_query_engine(sentence_index, similarity_top_k=6, rerank_top_n=2):
  postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
  rerank = SentenceTransformerRerank(
      top_n=rerank_top_n, model="BAAI/bge-reranker-base"
  )
  engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
  )

  return engine

Collecting fsspec (from torch)
  Downloading fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
Downloading fsspec-2024.6.1-py3-none-any.whl (177 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec
Successfully installed fsspec-2024.6.1


In [None]:
app = FastAPI()

@app.get("/test-llm/")
async def read_item(query: str):
    # read documents
    documents = SimpleDirectoryReader(
    input_files = ["./documents/survey_on_llms.pdf"]
    ).load_data()
    
    documents = Document(text = "\n\n".join([doc.text for doc in documents]))

    vector_index = get_build_index(documents=documents, llm=llm, embed_model="local:BAAI/bge-small-en-v1.5", sentence_window_size=3, save_dir="./vector_store/index")

    query_engine = get_query_engine(sentence_index=vector_index, similarity_top_k=6, rerank_top_n=2)
    
    response = query_engine.query(query)
    
    return {"response": display_response(response)}