<a href="https://colab.research.google.com/github/Hamza-Chekireb/RAG-Capacity-Evaluation-Pipeline-using-GPT-and-RAGAS/blob/main/RAG_and_Evaluation_Pipeline_Llama3_70b_V_1_0_Yahsat_MVP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 1.Requirements

#### 1.1. Installation

Installation of the necessary libraries

In [None]:
!pip install langchain
!pip install pypdf
!pip install sentence_transformers
!pip install langchain
!pip install langchain_community
!pip install chromadb
!pip install openai
!pip install langchain_nvidia_ai_endpoints
!pip install fastapi pyngrok uvicorn nest-asyncio
!pip install openai
!pip install ragas
!pip install datasets

#### 1.2. Importation

In [None]:
from IPython.display import display, Markdown
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from transformers import AutoTokenizer, TextStreamer, pipeline
from langchain import HuggingFacePipeline
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # AutoModelForCausalLM is used to add layer for application like QA
import langchain
from langchain import HuggingFacePipeline
from openai import OpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain
from fastapi import FastAPI
from pyngrok import ngrok
import uvicorn
import nest_asyncio
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferWindowMemory
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
# import pandas as pd

In [None]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive')

In [None]:
# ChatNVIDIA.get_available_models()
# llm.get_available_models()

### 2.Creation and Management of the Vector Database







In [None]:
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

In [None]:
DATA_PATH = '/content/drive/MyDrive/Yahsat RAG/Documents/Bibliographie'
DB_FAISS_PATH = 'vectorstore5/db_chroma5'

# Create vector database
def create_vector_db():
    loader = DirectoryLoader(DATA_PATH,
                             glob='*.pdf',
                             loader_cls=PyPDFLoader)

    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=512,
                                                   chunk_overlap=32)
    texts = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(
    # model_name="Alibaba-NLP/gte-large-en-v1.5",
    # model_name="sentence-transformers/all-roberta-large-v1",
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': DEVICE, 'trust_remote_code': True} # Add trust_remote_code to allow execution of remote configuration
)

    # 4. Creat Vector Data Base
    vdb = Chroma.from_documents(texts,embeddings,persist_directory=DB_FAISS_PATH)
    # store the db in repertory
    vdb.persist()
    return vdb

In [None]:
from tqdm import tqdm
vdb = create_vector_db()

### 3.Initialize the model

In [None]:
import getpass
import os
# del os.environ['NVIDIA_API_KEY']  ## delete key and reset
if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    print("Valid NVIDIA_API_KEY already in environment. Delete to reset")
else:
    nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ")
    assert nvapi_key.startswith("nvapi-"), f"{nvapi_key[:5]}... is not a valid key"
    os.environ["NVIDIA_API_KEY"] = nvapi_key

In [None]:
os.environ["NVIDIA_API_KEY"] = "***********************************************"

In [None]:
%pip install --upgrade --quiet langchain-nvidia-ai-endpoints

In [None]:
# Test the API
rag_llm = ChatNVIDIA(model="meta/llama3-70b-instruct", base_url = "https://integrate.api.nvidia.com/v1")
# result = llm.invoke("Tell a fact about the moon.")
# print(result.content)

In [None]:
result = rag_llm.invoke("Tell a fact about the moon.")
print(result.content)

In [None]:
memory = ConversationBufferWindowMemory(k=4)

**For Test**

In [None]:
def rag_pipeline(query):

  docs = vdb.similarity_search(query,k=3)
  context = "\n\n".join([doc.page_content for doc in docs]).replace("{","(").replace("}",")")
  prompt_template = (
      "You are an AI assistant. Use the following context and conversation history to answer the question in a concise and direct way.\n"
      f"Context: {context}\n\n"
      "Conversation History: {history}\n\n"
      "User's Question: {query}\n "
  )
  prompt = PromptTemplate(template=prompt_template, input_variables=["context","query","history"])
  chain = LLMChain(
      llm=rag_llm,
      prompt=prompt,
      verbose=False,
      memory=memory
  )
  return {"response" : chain.run(query),
            "context" : docs}

### 4.API for externe connection

In [None]:
def final_result(query,context):
  # docs = vdb.similarity_search(query,k=2)
  # context = "\n\n ".join([doc.page_content for doc in docs])
  prompt_template = (
    "You are an AI assistant. Use the following context and conversation history to answer the question in a concise and direct way.\n"
    f"Context: {context}\n\n"
    "Conversation History: {history}\n\n"
    "User's Question: {query}\n "
  )
  prompt = PromptTemplate(template=prompt_template, input_variables=["context","query","history"])
  chain = LLMChain(
    llm=rag_llm,
    prompt=prompt,
    verbose=False,
    memory=memory
  )
  return chain.run(query)

In [None]:
from pydantic import BaseModel
class Item(BaseModel):
    question: str
    context: str

In [None]:
from fastapi import FastAPI

app = FastAPI()

@app.post('/llama3-70b')
def api(item: Item):
    # Call your function and get the result
    result = final_result(item.question, item.context)
    return result  # Return the result as JSON

In [None]:
!ngrok authtoken 2i5bGtpF2hshSNabZb5pE0PPRKP_7fgcebbGhedWZPDx7kS9L
# Allow nested asyncio loops
nest_asyncio.apply()

ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
uvicorn.run(app, host='0.0.0.0', port=8000)

### 5.Model Evaluation

In [None]:
from tqdm import tqdm
# Get all the chunks
loader = DirectoryLoader(DATA_PATH,
                          glob='*.pdf',
                          loader_cls=PyPDFLoader)

documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512,
                                                chunk_overlap=32)
docs = text_splitter.split_documents(documents)

In [None]:
len(docs)

#### 5.1.Creating the dataset : **Generate questions**

**Generate a question for each context**: This part adds a question, generated by the ground_truth model for some selected contexts, to the evaluation dataset.

In [None]:
#1. Generate questions shema
question_schema = ResponseSchema(
    name="question",
    description="A question about the context",
    type="string"
)

context_schema = ResponseSchema(
    name="context",
    description="The context related to the question",
    type="string"
)

# Create a Structured Output Parser with both schemas
parser = StructuredOutputParser.from_response_schemas([question_schema, context_schema])

# Get the format instructions
format_instructions = parser.get_format_instructions()
print(format_instructions)


In [None]:
qa_template = """You are a University professor creating a test for advanced students.
For each context, generate a question about the context.
Format the output as JSON with the following keys:
- question
- context
Question: Generate a question about the given context
Context: {context}
format_instructions: {format_instructions}
"""

In [None]:
prompt_template = ChatPromptTemplate.from_template(qa_template)

In [None]:
prompt_template

In [None]:
# messages = prompt_template.format_messages(context=docs[2].page_content,
#                                            format_instructions=format_instructions)
# messages

In [None]:
# OpenAI Modelfrom langchain.chat_models import ChatOpenAI
import openai
os.environ['OPENAI_API_KEY'] = "**************************"
openai.api_key = os.environ['OPENAI_API_KEY']
ground_truth_llm = ChatOpenAI(model_name="gpt-4o", temperature=0.1, max_tokens=1024 )

In [None]:
# result = ground_truth_llm.invoke(messages)
# result

In [None]:
# output_dict = parser.parse(result.content)
# output_dict
# Markdown(output_dict['question'])
# Markdown(output_dict['context'])

In [None]:
from tqdm import tqdm
import random

In [None]:
def creat_qc(docs) -> list :
  qc = []
  random_docs = random.sample(docs, 15)
  for doc in tqdm(random_docs):
    messages = prompt_template.format_messages(context=doc.page_content.replace("{","(").replace("}",")"),
                                              format_instructions=format_instructions)
    result = rag_llm.invoke(messages)
    try :
      output_dict = parser.parse(result.content)
      qc.append(output_dict)
    except Exception as e:
      print(e)
      continue
  return qc

In [None]:
qc = creat_qc(docs)

In [None]:
qc

In [None]:
# for i in tqdm(range(len(qac))):
#   print()
#   print(type(qac[i]))
#   print(qac[i].keys())
#   print()

#### 5.2.Creating the dataset(Generate answers)

This part is dedicated to creating an answer for each context. The answer is the ground truth answer; in other words, it is the most accurate answer provided by the most accurate model (in our case, GPT-4). These answers will be used and compared with the answers generated by our RAG pipeline.


In [None]:
answer_schema = ResponseSchema(name="answer", description="The answer to the question")
parser = StructuredOutputParser.from_response_schemas([answer_schema])
parser
format_instructions = parser.get_format_instructions()
print(format_instructions)

In [None]:
template = """You are a University professor creating a test for advanced students.
For each question and context, generate a answer.
Format the output as JSON with the following keys:
- answer

Question: {question}

Context: {context}

format_instructions: {format_instructions}
"""

In [None]:
prompt_template = ChatPromptTemplate.from_template(template)
prompt_template

In [None]:
# messages = prompt_template.format_messages(context=qac[0]['context'],
#                                            question = qac[0]['question'],
#                                            format_instructions=format_instructions)
# print(messages[0].content)

In [None]:
# response = llm.invoke(messages)
# print(response.content)

In [None]:
# output_dict = parser.parse(response.content)
# output_dict

In [None]:
# Adding the ground truth answers to the dataset
def add_ground_truth_answers(qc) -> list:
  for i in tqdm(qc):
    messages = prompt_template.format_messages(context=i['context'],
                                              question = i['question'],
                                              format_instructions=format_instructions)
    response = ground_truth_llm.invoke(messages)
    try :
      output_dict = parser.parse(response.content)
      i['answer'] = output_dict['answer']
    except Exception as e:
      print(e)
      continue
  qcg = qc
  return qcg

#### 5.3. Create the evaluation dataset

In [None]:
# # from datasets import Dataset
# import pandas as pd
# ground_truth_data = pd.read_csv('ground_truth.csv')
# ground_truth_data

In [None]:
from re import escape
qcg = pd.DataFrame(add_ground_truth_answers(qc))
qcg_csv = qcg.to_csv('/content/qcg.csv', index=False, escapechar = '\\')

In [None]:
qcg

In [None]:
qcg.info()

#### 5.4. Evaluate the model

In [None]:
# !pip install --upgrade pyarrow

In [None]:
from ragas.metrics import answer_relevancy, faithfulness, context_recall, context_precision
from ragas.metrics.critique import harmfulness
from ragas import evaluate

In [None]:
import datasets

In [None]:
from datasets import Dataset
qcg_dataset = Dataset.from_csv("/content/qcg.csv")
qcg_dataset

The objective of this function is to create a dataset to be used with Ragas and have this schema.

```
{
    "context": [
        "Marie Curie was a physicist and chemist who conducted pioneering research on radioactivity. She was the first woman to win a Nobel Prize and the only person to win Nobel Prizes in two different scientific fields.",
        "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower."
    ],
    "questions": [
        "Who was the first woman to win a Nobel Prize?",
        "What is the Eiffel Tower named after?"
    ],
    "ground_truth_answers": [
        "Marie Curie was the first woman to win a Nobel Prize.",
        "The Eiffel Tower is named after Gustave Eiffel."
    ],
    "generated_answers": [
        "Marie Curie was the first woman to win a Nobel Prize.",
        "The Eiffel Tower is named after the engineer who built it, Gustave Eiffel."
    ]
}
```



In [None]:
def create_ragas_eval_dataset(rag_pipeline, qcg_dataset):
  rag_dataset = []
  for row in tqdm(qcg_dataset):
    # Anser generated by the pipeline
    answer = rag_pipeline(row['question'])
    rag_dataset.append({
        'question' : row['question'],
        'answer' : answer['response'],
        'ground_truth' : row['answer'],
        "contexts" : [context.page_content for context in answer["context"]],
    })
  rag_df = pd.DataFrame(rag_dataset)
  eval_dataset = Dataset.from_pandas(rag_df)
  return eval_dataset


In [None]:
eval_dataset = create_ragas_eval_dataset(rag_pipeline, qcg_dataset)
eval_dataset

In [None]:
# !pip install --upgrade ragas  # Ensure you have the latest version of ragas
# ! pip install nest_asyncio

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
from tqdm import tqdm

from ragas import evaluate, metrics, RunConfig
from ragas.metrics import  answer_relevancy, context_recall, context_precision, faithfulness, ContextRelevancy

def evaluate_ragas_dataset(ragas_dataset):
  result = evaluate(
      ragas_dataset,
      metrics = [
      answer_relevancy,
      context_recall,
      context_precision,
      faithfulness
      ],
      llm = ground_truth_llm,
      # Pass raise_exceptions=False to get more information about the error
      raise_exceptions=False
  )
  return result

evaluation = evaluate_ragas_dataset(eval_dataset)

In [None]:
#'sentence-transformers/all-MiniLM-L6-v2'chenks = 512
# Eval Model = "gpt-4o"
print(evaluation)

In [None]:
# Print the evaluation result, which may contain error messages : #'sentence-transformers/all-MiniLM-L6-v2'chenks = 1024
print(evaluation)

In [None]:
# sentence-transformers/all-roberta-large-v1
print(evaluation)

In [None]:
# Embedding model used : "Alibaba-NLP/gte-large-en-v1.5"


In [None]:
# Embedding model used : #'sentence-transformers/all-MiniLM-L6-v2'


**Answer Relevancy (0.9736)**: Indicates that the answers generated are very relevant to the user's queries.

**Context Recall (0.7273)**: Shows that the system is retrieving a significant portion of relevant context but may still miss some relevant pieces.

**Context Precision (0.9231)**: Suggests that the majority of the retrieved context is relevant, with minimal irrelevant information included.

**Faithfulness (0.8654)**: Implies that the generated responses are mostly accurate and true to the source information.