In [1]:
import torch
from transformers import BitsAndBytesConfig
from llama_index.prompts import PromptTemplate
from llama_index.llms import HuggingFaceLLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [3]:
def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == 'system':
            prompt += f"<|system|>\n{message.content}</s>\n"
        elif message.role == 'user':
            prompt += f"<|user|>\n{message.content}</s>\n"
        elif message.role == 'assistant':
            prompt += f"<|assistant|>\n{message.content}</s>\n"
            
    if not prompt.startswith("<|system|>\n"):
        prompt = "<|system|>\n</s>\n" + prompt
        
    prompt = prompt + "<|assistant|>\n"
    
    return prompt
        

In [4]:
llm = HuggingFaceLLM(
    model_name="HuggingFaceH4/zephyr-7b-beta",
    tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
    query_wrapper_prompt=PromptTemplate("<|system|>\n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"),
    context_window=3900,
    max_new_tokens=256,
    model_kwargs={"quantization_config": quantization_config},
    # tokenizer_kwargs={},
    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
    messages_to_prompt=messages_to_prompt,
    device_map="auto",
)

Loading checkpoint shards: 100%|██████████| 8/8 [00:06<00:00,  1.25it/s]


Get the document using SimpleDirectoryReader from the input dir

TODO:
1) Add metadata
2) Clean files

In [5]:
from llama_index import ServiceContext, SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_dir="./data",
    required_exts=[".go"],
    recursive=True,
).load_data()

print(f"Loaded {len(documents)} docs")

Loaded 53 docs


Create nodes from documents for retrival using the CodeSplitter

TODO:
1) Add metadata

In [6]:
from llama_index.node_parser import CodeSplitter

parser = CodeSplitter.from_defaults(
    language="go",
)

nodes = parser.get_nodes_from_documents(documents)


In [7]:
print(f"Loaded {len(nodes)} nodes")
print(f"First node: {nodes[0]}")

Loaded 278 nodes
First node: Node ID: 09e4da9f-c06a-4674-a286-52619a84d281
Text: package dashboard  import (         "embed"
"github.com/Alfagov/goDashboard/internal/config"
"github.com/Alfagov/goDashboard/internal/logger"
"github.com/Alfagov/goDashboard/internal/utils"
"github.com/Alfagov/goDashboard/models"
"github.com/Alfagov/goDashboard/pkg/components"
"github.com/a-h/templ"         "github.com/gofiber/fiber/v2"
fLogger "github...


In [8]:
from llama_index import ServiceContext

auto_merging_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    node_parser=parser,
)

In [9]:
from llama_index import VectorStoreIndex, StorageContext

storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

automerging_index = VectorStoreIndex(
    nodes=nodes,
    storage_context=storage_context,
    service_context=auto_merging_context,
)

automerging_index.storage_context.persist(persist_dir="./merging_index")

In [None]:
from llama_index.indices.postprocessor import LLMRerank
from llama_index.retrievers import AutoMergingRetriever
from llama_index.query_engine import RetrieverQueryEngine

automerging_retriver = automerging_index.as_retriever(
    similarity_top_k=12,
)

retriver = AutoMergingRetriever(
    automerging_retriver,
    automerging_index.storage_context,
    verbose=True,
)