In [1]:
#Choosing chunk size


**#Overview:Choosing Chunk Size in LLMs (Large Language Models)**

**Definition:** Chunk size refers to the amount of text data split into smaller, manageable pieces for processing by language models.

**Purpose:** Balancing between computational efficiency and maintaining context for accurate responses.

In [None]:
import nest_asyncio
import random
import time
import os
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.core.prompts import PromptTemplate
from llama_index.core.evaluation import DatasetGenerator, FaithfulnessEvaluator, RelevancyEvaluator
from llama_index.llms import Ollama

In [None]:

# Apply nest_asyncio to avoid asyncio issues
nest_asyncio.apply()

# Load environment variables
load_dotenv()

# Directory for data
data_dir = "sample"
documents = SimpleDirectoryReader(data_dir).load_data()

# Create evaluation questions and pick k out of them
num_eval_questions = 25
eval_documents = documents[0:20]
data_generator = DatasetGenerator.from_documents(eval_documents)
eval_questions = data_generator.generate_questions_from_nodes()
k_eval_questions = random.sample(eval_questions, num_eval_questions)

# Define service context for Ollama for evaluation
llama_service_context = ServiceContext.from_defaults(llm=Ollama(model="llama3"))

In [None]:
# Define Faithfulness and Relevancy Evaluators
faithfulness_llama = FaithfulnessEvaluator(service_context=llama_service_context)
relevancy_llama = RelevancyEvaluator(service_context=llama_service_context)

# Define new prompt template for faithfulness evaluation
faithfulness_new_prompt_template = PromptTemplate(""" Please tell if a given piece of information is directly supported by the context.
    You need to answer with either YES or NO.
    Answer YES if any part of the context explicitly supports the information, even if most of the context is unrelated. If the context does not explicitly support the information, answer NO. Some examples are provided below.

    Information: Apple pie is generally double-crusted.
    Context: An apple pie is a fruit pie in which the principal filling ingredient is apples.
    Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard, or cheddar cheese.
    It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).
    Answer: YES

    Information: Apple pies taste bad.
    Context: An apple pie is a fruit pie in which the principal filling ingredient is apples.
    Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard, or cheddar cheese.
    It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).
    Answer: NO

    Information: Paris is the capital of France.
    Context: This document describes a day trip in Paris. You will visit famous landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral.
    Answer: NO

    Information: {query_str}
    Context: {context_str}
    Answer:
    """)

faithfulness_llama.update_prompts({"your_prompt_key": faithfulness_new_prompt_template})

In [None]:
def evaluate_response_time_and_accuracy(chunk_size, eval_questions):
    """
    Evaluate the average response time, faithfulness, and relevancy of responses generated by the local Ollama model for a given chunk size.
    
    Parameters:
    chunk_size (int): The size of data chunks being processed.
    
    Returns:
    tuple: A tuple containing the average response time, faithfulness, and relevancy metrics.
    """
    total_response_time = 0
    total_faithfulness = 0
    total_relevancy = 0

    # Create vector index using Ollama
    llm = Ollama(model="llama3")
    service_context = ServiceContext.from_defaults(llm=llm, chunk_size=chunk_size, chunk_overlap=chunk_size//5)
    vector_index = VectorStoreIndex.from_documents(
        eval_documents, service_context=service_context
    )
    query_engine = vector_index.as_query_engine(similarity_top_k=5)
    num_questions = len(eval_questions)

    for question in eval_questions:
        start_time = time.time()
        response_vector = query_engine.query(question)
        elapsed_time = time.time() - start_time
        
        faithfulness_result = faithfulness_llama.evaluate_response(
            response=response_vector
        ).passing
        
        relevancy_result = relevancy_llama.evaluate_response(
            query=question, response=response_vector
        ).passing

        total_response_time += elapsed_time
        total_faithfulness += faithfulness_result
        total_relevancy += relevancy_result

    average_response_time = total_response_time / num_questions
    average_faithfulness = total_faithfulness / num_questions
    average_relevancy = total_relevancy / num_questions

    return average_response_time, average_faithfulness, average_relevancy


In [None]:
# Test different chunk sizes
chunk_sizes = [128, 256]

for chunk_size in chunk_sizes:
    avg_response_time, avg_faithfulness, avg_relevancy = evaluate_response_time_and_accuracy(chunk_size, k_eval_questions)
    print(f"Chunk size {chunk_size} - Average Response time: {avg_response_time:.2f}s, Average Faithfulness: {avg_faithfulness:.2f}, Average Relevancy: {avg_relevancy:.2f}")

**#Advantages of Using Local Models**

**Cost Efficiency:** No need for API calls to external services, reducing costs associated with usage-based pricing.

**Data Privacy:** Sensitive data remains within your local environment, minimizing risks of data leakage.

**Customizability:** Easier to customize and fine-tune local models to meet specific needs.

**Control Over Performance:** Direct control over model performance and resource utilization.

**#Disadvantages of Using Local Models**

**Resource Intensive:** Requires substantial local computational resources for model inference and training.

**Maintenance:** Local models need to be managed and updated, which can be time-consuming.

**Scalability:** Scaling to large datasets or high volumes of requests may be challenging without robust infrastructure.

**Initial Setup Complexity:** Setting up and configuring local models can be complex and require specialized knowledge.






