# Contextual Chunk Headers (CCH)
Contextual chunk headers (CCH) is a method of creating chunk headers that contain higher-level context (such as document-level or section-level context), and prepending those chunk headers to the chunks prior to embedding them. This gives the embeddings a much more accurate and complete representation of the content and meaning of the text. In our testing, this feature leads to a substantial improvement in retrieval quality. In addition to increasing the rate at which the correct information is retrieved, CCH also reduces the rate at which irrelevant results show up in the search results. This reduces the rate at which the LLM misinterprets a piece of text in downstream chat and generation applications.

In [None]:
!pip install langchain langchain-openai python-dotenv langchain-nvidia-ai-endpoints tiktoken

In [2]:
import getpass
import os


def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("NVIDIA_API_KEY")

NVIDIA_API_KEY: ··········


In [3]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain.prompts import PromptTemplate


llm = ChatNVIDIA(model="meta/llama-3.1-8b-instruct")

llm.invoke("what is capital of India").content

'The capital of India is **New Delhi**.'

In [4]:
import tiktoken
from typing import List
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter

os.environ["COHERE_API_KEY"] = getpass.getpass("COHERE_API_KEY: ")

COHERE_API_KEY: ··········


## 1. Load the document and split it into chunks

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

def split_into_chunks(text: str, chunk_size: int= 800) -> list[str]:
  text_splitter = RecursiveCharacterTextSplitter(
      chunk_size = chunk_size,
      chunk_overlap=0,
      length_function=len,
  )
  documents = text_splitter.create_documents([text])
  return [document.page_content for document in documents]


In [6]:
file_path = '/content/Text summarization.txt'
# Read the document and split it into chunks
with open(file_path, "r") as file:
    document_text = file.read()

chunks = split_into_chunks(document_text, chunk_size=800)

## 2. Generate descriptive document title to use in chunk header

In [7]:
# Constants
DOCUMENT_TITLE_PROMPT = """
INSTRUCTIONS
What is the title of the following document?

Your response MUST be the title of the document, and nothing else. DO NOT respond with anything else.

{document_title_guidance}

{truncation_message}

DOCUMENT
{document_text}
""".strip()

TRUNCATION_MESSAGE = """
Also note that the document text provided below is just the first ~{num_words} words of the document. That should be plenty for this task. Your response should still pertain to the entire document, not just the text provided below.
""".strip()

In [8]:
MAX_CONTENT_TOKENS = 4000
MODEL_NAME = "meta/llama-3.1-70b-instruct"
TOKEN_ENCODER = tiktoken.encoding_for_model(MODEL_NAME)

def make_llm_call(chat_message: list[dict]) ->  str:
  client = ChatNVIDIA(model=MODEL_NAME)
  # The ChatNVIDIA object directly has an invoke method
  # Pass the prompt as the 'input' argument
  response = client.invoke(
      input=chat_message[0]['content'],
      max_tokens=MAX_CONTENT_TOKENS,
      temperature=0.2

  )
  return response.content.strip() # Access the content of the response# Access the content of the response

def truncate_content(content: str, max_tokens: int) -> tuple[str, int]:
    tokens = TOKEN_ENCODER.encode(content, disallowed_special=())
    truncated_tokens = tokens[:max_tokens]
    return TOKEN_ENCODER.decode(truncated_tokens), min(len(tokens), max_tokens)

def get_document_title(document_text: str, document_title_guidance: str = "") -> str:
   # Truncate the content if it's too long
    document_text, num_tokens = truncate_content(document_text, MAX_CONTENT_TOKENS)
    truncation_message = TRUNCATION_MESSAGE.format(num_words=3000) if num_tokens >= MAX_CONTENT_TOKENS else ""

    # Prepare the prompt for title extraction
    prompt = DOCUMENT_TITLE_PROMPT.format(
        document_title_guidance=document_title_guidance,
        document_text=document_text,
        truncation_message=truncation_message
    )
    chat_messages = [{"role": "user", "content": prompt}]

    return make_llm_call(chat_messages)

# Example usage
if __name__ == "__main__":
    # Assuming document_text is defined elsewhere
    document_title = get_document_title(document_text)
    print(f"Document Title: {document_title}")

Document Title: Text Summarization Assistant


## 3. Add chunk header and measure impact

In [12]:
COHERE_API_KEY = "gdCnMqRm4iW5lqhsnCSWITy3zaapGV5KjlT6TukH"

In [13]:
import cohere
def rerank_documents(query: str, chunks: List[str]) -> List[float]:
    MODEL = "rerank-english-v3.0"
    client = cohere.Client(api_key=os.environ["COHERE_API_KEY"])

    reranked_results = client.rerank(model=MODEL, query=query, documents=chunks)
    results = reranked_results.results
    reranked_indices = [result.index for result in results]
    reranked_similarity_scores = [result.relevance_score for result in results]

    # Convert back to order of original documents
    similarity_scores = [0] * len(chunks)
    for i, index in enumerate(reranked_indices):
        similarity_scores[index] = reranked_similarity_scores[i]

    return similarity_scores

def compare_chunk_similarities(chunk_index: int, chunks: List[str], document_title: str, query: str) -> None:
    chunk_text = chunks[chunk_index]
    chunk_wo_header = chunk_text
    chunk_w_header = f"Document Title: {document_title}\n\n{chunk_text}"

    similarity_scores = rerank_documents(query, [chunk_wo_header, chunk_w_header])

    print(f"\nChunk header:\nDocument Title: {document_title}")
    print(f"\nChunk text:\n{chunk_text}")
    print(f"\nQuery: {query}")
    print(f"\nSimilarity without contextual chunk header: {similarity_scores[0]:.4f}")
    print(f"Similarity with contextual chunk header: {similarity_scores[1]:.4f}")

CHUNK_INDEX_TO_INSPECT = 2 # Change to a valid index within the range of chunks list
QUERY = "Text summarization method"

compare_chunk_similarities(CHUNK_INDEX_TO_INSPECT, chunks, document_title, QUERY)


Chunk header:
Document Title: Text Summarization Assistant

Chunk text:
Target Users:
Project managers, analysts, and professionals who need quick insights from large volumes of text.

Query: Text summarization method

Similarity without contextual chunk header: 0.0107
Similarity with contextual chunk header: 0.9587
