# Contextual Chunk Headers (CCH)

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY")

In [2]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192", max_tokens=1000)

#Split into chunks

In [5]:
from langchain_community.document_loaders import PyPDFLoader

async def load_pdf_data(filepath: str):
    """Loads the data from pdf file"""
    loader = PyPDFLoader(file_path=filepath)
    documents = await loader.aload()
    return documents

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

async def split_into_chunks(text: str, chunk_size: int = 800) -> list[str]:
    """
    Split a given text into chunks of specified size using RecursiveCharacterTextSplitter.

    Args:
        text (str): The input text to be split into chunks.
        chunk_size (int, optional): The maximum size of each chunk. Defaults to 800.

    Returns:
        list[str]: A list of text chunks.

    Example:
        >>> text = "This is a sample text to be split into chunks."
        >>> chunks = split_into_chunks(text, chunk_size=10)
        >>> print(chunks)
        ['This is a', 'sample', 'text to', 'be split', 'into', 'chunks.']
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=0,
        length_function=len
    )
    documents =  text_splitter.create_documents([text])
    return [document.page_content for document in documents]

In [10]:
import nest_asyncio
nest_asyncio.apply()

import asyncio

FILE_PATH = r"D:\My Files\RAG-Techniques\RAG.pdf"

documents = asyncio.run(load_pdf_data(FILE_PATH))
document_text = " ".join([document.page_content for document in documents])
chunks = asyncio.run(split_into_chunks(document_text))

In [12]:
print(chunks[0])

Retrieval-Augmented Generation for
Knowledge-Intensive NLP Tasks
Patrick Lewis†‡, Ethan Perez⋆,
Aleksandra Piktus†, Fabio Petroni†, Vladimir Karpukhin†, Naman Goyal†, Heinrich Küttler†,
Mike Lewis†, Wen-tau Yih†, Tim Rocktäschel†‡, Sebastian Riedel†‡, Douwe Kiela†
†Facebook AI Research; ‡University College London; ⋆New York University;
plewis@fb.com
Abstract
Large pre-trained language models have been shown to store factual knowledge
in their parameters, and achieve state-of-the-art results when ﬁne-tuned on down-
stream NLP tasks. However, their ability to access and precisely manipulate knowl-
edge is still limited, and hence on knowledge-intensive tasks, their performance
lags behind task-speciﬁc architectures. Additionally, providing provenance for their


#Generate Descriptive Document title

In [13]:
DOCUMENT_TITLE_PROMPT = """
INSTRUCTIONS
What is the title of the following document?

Your response MUST be the title of the document, and nothing else. DO NOT respond with anything else.

{document_title_guidance}

{truncation_message}

DOCUMENT
{document_text}
""".strip()

In [14]:
TRUNCATION_MESSAGE = """
Also note that the document text provided below is just the first ~{num_words} words of the document. That should be plenty for this task. Your response should still pertain to the entire document, not just the text provided below.
""".strip()

In [16]:
from langchain_huggingface import HuggingFaceEmbeddings

MAX_CONTENT_TOKENS = 4000

async def make_llm_call(chat_messages: list[dict]) -> str:
    """
    Make an API call to the Groq language model.

    Args:
        chat_messages (list[dict]): A list of message dictionaries for the chat completion.

    Returns:
        str: The generated response from the language model.
    """
    llm = ChatGroq(model="llama3-8b-8192", max_tokens= MAX_CONTENT_TOKENS, temperature=0.2)
    response = await llm.ainvoke(chat_messages)
    return response

In [1]:
import tiktoken

TOKEN_ENCODER = tiktoken.encoding_for_model("llama3-8b-8192")

def truncate_content(content: str, max_tokens: int) -> tuple[str, int]:
    """
    Truncate the content to a specified maximum number of tokens.

    Args:
        content (str): The input text to be truncated.
        max_tokens (int): The maximum number of tokens to keep.

    Returns:
        tuple[str, int]: A tuple containing the truncated content and the number of tokens.
    """
    tokens = TOKEN_ENCODER.encode(content)
    truncated_tokens = tokens[:max_tokens]
    return TOKEN_ENCODER.decode(truncated_tokens),  min(len(tokens), max_tokens)

ModuleNotFoundError: No module named 'tiktoken'