In [3]:
%pip install --upgrade -r requirements.txt -U

Collecting llama-index-core (from -r requirements.txt (line 12))
  Using cached llama_index_core-0.10.52.post1-py3-none-any.whl.metadata (2.5 kB)
Collecting python-dotenv (from -r requirements.txt (line 13))
  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: d:\OtsukaInternship\Code\RAG\Test1\venv\Scripts\python.exe -m pip install --upgrade pip


In [4]:
import os
import nest_asyncio
from dotenv import load_dotenv
load_dotenv()

nest_asyncio.apply()

In [5]:
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.core import SimpleDirectoryReader
# Initialize an embedding model from Hugging Face using the "BAAI/bge-small-en" model.
embedding_model = AzureOpenAIEmbedding(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    model="text-embedding-3-large",
    deployment_name="text-embedding3",
    api_version="2024-02-01"
    
)
llm = AzureOpenAI(
    model="gpt-35-turbo-16k",
    deployment_name="GPT35-turboA",
    api_version="2024-02-01"
)

In [18]:
source_docs=SimpleDirectoryReader('source_docs').load_data()

# Advanced RAG Techniques

## Chunking with Overlap (Baseline)

Parse source_docs into nodes

In [19]:
from llama_index.core import Settings
from llama_index.core.node_parser import SimpleNodeParser


baseline_parser = SimpleNodeParser.from_defaults(
    chunk_overlap=200,  # Overlap of 200 characters between chunks
    chunk_size=1024     # Each chunk consists of 1024 characters
)

# Use the created parser instance to extract nodes from the documents.
# The 'get_nodes_from_documents' method processes the documents in 'source_docs'
# and extracts structured nodes based on the parser's configuration.
baseline_nodes = baseline_parser.get_nodes_from_documents(source_docs)

In [20]:
from llama_index.core import VectorStoreIndex


Settings.llm=llm
Settings.embed_model=embedding_model
Settings.node_parser=baseline_parser


baseline_index = VectorStoreIndex(
    baseline_nodes,              # Nodes to be indexed # Service context providing necessary components
)


baseline_index.storage_context.persist(
    persist_dir="baseline_index"  # Directory name for storing the index
)

In [21]:
# Convert the baseline index into a query engine capable of finding the top 3 most similar entries.
baseline_query_engine = baseline_index.as_query_engine(similarity_top_k=3)

baseline_response = baseline_query_engine.query('What do you observer when youn look at Figure 1?')

# Retrieve the response from the query.
baseline_response.response

'You observe the cerebellum of an alpaca, specifically the unaffected cerebellar lateral hemisphere with dense myelin staining in the white matter, as well as the molecular layer.'

## Sentence Window Parser

In [22]:
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core import Settings



#   original_text_metadata_key: The key for storing the original text data in the metadata.
sentence_parser = SentenceWindowNodeParser.from_defaults(
    window_size=6,  # Number of sentences in each window
    window_metadata_key="window",  # Metadata key for window information
    original_text_metadata_key="original_text"  # Metadata key for original text
)

# Use the sentence parser to parse nodes from documents.
# This method processes the documents in 'source_docs' and extracts structured nodes,
# with each node representing a window of sentences.
sentence_nodes = sentence_parser.get_nodes_from_documents(source_docs)

from llama_index.core import Settings

Settings.llm=llm
Settings.embed_model=embedding_model
Settings.node_parser=sentence_parser

In [23]:
print(sentence_nodes[15], "\n")
sentence_nodes[15].metadata

Node ID: cac70354-268d-4ad5-a745-449317e4e411
Text: Case description An approximately  7-hour-old 5.2-kg female
intact Suri alpaca  cria was presented to the Oklahoma State
University Boren  Veterinary Medical Teaching Hospital with the
complaint of  being hypothermic, lethargic, and unable to stand and
nurse. 



{'window': '(Traduit par Isabelle Vallières)\nCan Vet J 2011;52:784–787\nIntroduction\nAcute respiratory distress syndrome, referred to as ARDS, \nis the manifestation of an intra- or extra-pulmonary insult \nresulting in an overzealous inflammatory cascade in the lungs. \n Ultimately, interstitial pulmonary edema develops and is fre -\nquently fatal.  The syndrome was first described in humans but \nhas since been recognized in animals, particularly in companion animals and foals (1,2).  Mortality rates vary from up to 60% in humans to almost 100% in small animal species (2).  This syn -\ndrome has not been previously reported in a camelid species, but should be included on the list of differential diagnoses for crias with acute onset respiratory distress.  Although the prognosis in other species is often guarded, treatment of alpaca crias can \nhave a good outcome.\n Case description\nAn approximately  7-hour-old 5.2-kg female intact Suri alpaca \ncria was presented to the Oklahoma S

In [24]:
from llama_index.core import VectorStoreIndex


sentence_index = VectorStoreIndex(
    sentence_nodes,         # Nodes obtained from sentence-level parsing
)

#   persist_dir: The name of the directory where the index will be stored.
sentence_index.storage_context.persist(
    persist_dir="sentence_index"  # Directory name for storing the index
)

In [25]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor


sentence_query_engine = sentence_index.as_query_engine(
    similarity_top_k=3,  # Find the top 3 most similar entries
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ]
)

sentence_response = sentence_query_engine.query('What do you observer when youn look at Figure 1?')


response = sentence_response.response
response

'I observe that Figure 1 shows the total oocytes, cumulus expansion, maturation rate, and fertilization rate.'

## Automerging Retrival (Using Hierarchical Nodes)

In [26]:
from llama_index.core.node_parser import HierarchicalNodeParser
from llama_index.core import Settings

# Initialize a HierarchicalNodeParser with default settings.
# HierarchicalNodeParser is designed to parse and structure text data into a hierarchy of nodes,
# allowing for a more structured and layered representation of the text.
# This can be particularly useful for complex documents where different levels of granularity are needed.
hierarchical_parser = HierarchicalNodeParser.from_defaults()


hierarchical_nodes = hierarchical_parser.get_nodes_from_documents(source_docs)

Settings.llm=llm
Settings.embed_model=embedding_model
Settings.node_parser=hierarchical_parser

In [27]:
from llama_index.core import VectorStoreIndex


hierarchical_index = VectorStoreIndex(
    hierarchical_nodes,           # Nodes structured hierarchically  # Service context with essential components
)

# Persist the hierarchical index to a directory. This action saves the current state of the index on disk,
# enabling the index to be reloaded and reused in the future. It ensures the persistence and availability
# of the indexed data for later use.
# Parameters:
#   persist_dir: The name of the directory where the index will be stored.
hierarchical_index.storage_context.persist(
    persist_dir="hierarchical_index"  # Directory name for storing the index
)

In [28]:
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

# Initialize an AutoMergingRetriever with the hierarchical index.
# AutoMergingRetriever is used for retrieving data from an index by automatically merging results
# from multiple queries or sources. It's particularly useful for complex data structures like a hierarchical index.
# Parameters:
#   hierarchical_index.as_retriever(similarity_top_k=3): Converts the hierarchical index into a retriever
#                                                        configured to find the top 3 most similar entries.
#   storage_context: Specifies the storage context from the hierarchical index for data management.
#   verbose: Enables verbose output, providing more detailed information during retrieval operations.
retriever = AutoMergingRetriever(
    hierarchical_index.as_retriever(similarity_top_k=3),
    storage_context=hierarchical_index.storage_context,
    verbose=True
)

# Create a RetrieverQueryEngine using the AutoMergingRetriever.
# RetrieverQueryEngine is a query engine that uses a specified retriever for querying the indexed data.
# It allows for complex query operations, especially in conjunction with retrievers like AutoMergingRetriever.
amretriever_query_engine = RetrieverQueryEngine.from_args(retriever)

# Perform a query using the AMRetriever query engine.
# The query is about the influence of camelid genetics on wool quality. The query engine processes this
# input and retrieves the most relevant entries from the index.
amretriever_response = amretriever_query_engine.query('What do you observer when youn look at Figure 1?')

# Retrieve the response from the query.
# The response contains the results of the query as determined by the query engine, potentially offering
# valuable insights into the queried topic.
response = amretriever_response.response

In [29]:
response

'I observe the presence of metaphase II and the extrusion of the first polar corpuscle, which is stained with 1% acetoorcein.'

# Evaluating RAG Performance

## Generating a test dataset

In [30]:
from ragas.testset.generator import TestsetGenerator
import random

# Initialize a TestsetGenerator using its default settings.
# TestsetGenerator is used for generating test datasets, typically for model evaluation or testing.
# The 'from_default' method sets up the generator with default configurations.
testsetgenerator = TestsetGenerator.from_llama_index(
    generator_llm=llm,
    critic_llm=llm,
    embeddings=embedding_model,
)

# Specify the sample size for the source documents.
# This determines how many documents will be randomly selected from the source documents.
sample_size = 5

# Define the number of questions to be included in the test set.
# This will set how many test cases or questions the test set will contain.
num_questions = 10

# Generate a test dataset from a random sample of source documents.
# 'random.sample' is used to randomly select a subset of documents from the source.
# The test set is then generated based on these documents.
# Parameters:
#   random.sample(source_docs, sample_size): A randomly selected subset of source documents.
#   test_size: The number of questions or test cases to generate in the test set.
testset = testsetgenerator.generate_with_llamaindex_docs(
    random.sample(source_docs, sample_size),  # Randomly selected documents
    test_size=num_questions                    # Number of questions in the test set
)

  from .autonotebook import tqdm as notebook_tqdm
Filename and doc_id are the same for all nodes.                 
Generating:  60%|██████    | 6/10 [01:03<00:30,  7.53s/it]max retries exceeded for MultiContextEvolution(generator_llm=LlamaIndexLLMWrapper(run_config=RunConfig(timeout=60, max_retries=15, max_wait=90, max_workers=16, thread_timeout=80.0, exception_types=(<class 'Exception'>,), log_tenacity=False)), docstore=InMemoryDocumentStore(splitter=<langchain_text_splitters.base.TokenTextSplitter object at 0x000001658996AA10>, nodes=[Node(metadata={'page_label': '10', 'file_name': 'Influence of Follicular Fluid on in Vitro.pdf', 'file_path': 'd:\\OtsukaInternship\\Code\\RAG\\Test1\\source_docs\\Influence of Follicular Fluid on in Vitro.pdf', 'file_type': 'application/pdf', 'file_size': 1144502, 'creation_date': '2024-07-02', 'last_modified_date': '2023-10-09'}, page_content='I. Covelo  et al.  \n \n \nDOI: 10.4236/ojas.2022.121009 127 Open Journal of Animal Sciences  \n Zoology , 50

## Minor cleanup and reformatting

In [31]:
import re

test_df = testset.to_pandas()
# Define the regex pattern to match any character that is NOT a letter, a number, '.', ',', or '?'
pattern = r"[^a-zA-Z0-9.,? ]"

# Define a function to replace special characters in a string
def remove_special_chars(s):
    return re.sub(pattern, '', str(s))

# Apply the function to each cell in the DataFrame
test_df = test_df.applymap(remove_special_chars)


test_questions = test_df['question'].values.tolist()
test_answers = [[item] for item in test_df['ground_truth'].values.tolist()]

test_df

  test_df = test_df.applymap(remove_special_chars)


Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What is the Transformer and how does it differ...,Table 3 Variations on the Transformer architec...,The answer to given question is not present in...,simple,"pagelabel 9, filename attentionisallyouneed.pd...",True
1,How does the Transformer architecture perform ...,Table 3 Variations on the Transformer architec...,The answer to given question is not present in...,simple,"pagelabel 9, filename attentionisallyouneed.pd...",True
2,What are the key findings and conclusions rega...,Table 3 Variations on the Transformer architec...,"In this work, the authors presented the Transf...",simple,"pagelabel 9, filename attentionisallyouneed.pd...",True
3,What are the variations on the Transformer arc...,Table 3 Variations on the Transformer architec...,"In Table 3 rows B, we observe that reducing th...",simple,"pagelabel 9, filename attentionisallyouneed.pd...",True


In [21]:
import asyncio
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
    answer_similarity,
    answer_correctness
)
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from ragas.integrations.llama_index import evaluate
import pandas as pd
import time
import httpx 

# List of evaluation metrics functions to be used.
metrics = [
    faithfulness,           # Evaluates faithfulness of the response to the source material.
    answer_relevancy,       # Assesses relevance of the response to the query.
    context_precision,      # Measures precision of the context in the response.
    context_recall,         # Measures recall of the context in the response.
    answer_correctness,     # Checks correctness of the answer.
    answer_similarity,      # Evaluates similarity of the answer to a reference answer.
]

# A list to collect individual result DataFrames.
results_list = []

In [22]:
@retry(stop=stop_after_attempt(5), wait=wait_exponential(min=1, max=60), retry=retry_if_exception_type(httpx.HTTPStatusError))
def safe_evaluate(query_engine, metrics, dataset, llm, embeddings):
    return evaluate(query_engine=query_engine, metrics=metrics, dataset=dataset, llm=llm, embeddings=embeddings)

In [23]:
def evaluate_and_append(query_engine, technique):
    # Evaluate the query engine.
    result = safe_evaluate(query_engine=query_engine, metrics=metrics, dataset=test_df, llm=llm, embeddings=embedding_model)
    # Add a 'technique' column to the result DataFrame.
    result['technique'] = technique

    # Add the result DataFrame to the results list.
    results_list.append(result)

    # Sleep to handle rate limits.
    # time.sleep(60)

In [32]:
evaluate_and_append(baseline_query_engine, 'chunks_with_overlap')

Running Query Engine:   0%|          | 0/4 [00:00<?, ?it/s]

Running Query Engine: 100%|██████████| 4/4 [00:02<00:00,  1.41it/s]
Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
Evaluating:   4%|▍         | 1/24 [00:00<00:12,  1.86it/s]n values greater than 1 not support for LlamaIndex LLMs
Evaluating:  25%|██▌       | 6/24 [00:02<00:07,  2.57it/s]n values greater than 1 not support for LlamaIndex LLMs
Evaluating: 100%|██████████| 24/24 [01:06<00:00,  2.79s/it]


In [37]:
evaluate_and_append(sentence_query_engine, 'sentence_window')

Running Query Engine: 100%|██████████| 4/4 [00:03<00:00,  1.25it/s]
Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
Evaluating:  21%|██        | 5/24 [00:03<00:14,  1.28it/s]n values greater than 1 not support for LlamaIndex LLMs
Evaluating: 100%|██████████| 24/24 [01:08<00:00,  2.84s/it]


In [42]:
evaluate_and_append(amretriever_query_engine, 'Automerging_Retrival')


Running Query Engine: 100%|██████████| 4/4 [00:02<00:00,  1.56it/s]
Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
Evaluating: 100%|██████████| 24/24 [01:08<00:00,  2.86s/it]


In [43]:
# Convert each Result object's items to a dictionary and collect them in a list
dict_list = [dict(result.items()) for result in results_list]

# Convert the list of dictionaries to a DataFrame
results_df = pd.DataFrame(dict_list)

results_df

Unnamed: 0,faithfulness,answer_relevancy,context_precision,context_recall,answer_correctness,answer_similarity,technique
0,0.75,0.69527,0.5,0.4375,0.322082,0.403711,chunks_with_overlap
1,0.95,0.752753,0.5,0.5,0.253739,0.377456,sentence_window
2,0.680952,0.667933,0.5,0.5,0.274703,0.380061,Automerging_Retrival


In [None]:
%pip install mlflow azureml-mlflow -U -q