In [21]:
print("perplexity")

perplexity


In [22]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [23]:
perplexity_api_key=os.getenv("perplexity_api_key")

In [24]:
from openai import OpenAI

model="llama-3.1-sonar-huge-128k-online"
prompt="What is Rag system?"

In [25]:
prompt=[
    {
        "role": "system",
        "content": (
            "You are an artificial intelligence assistant and you need to "
            "engage in a helpful, detailed, polite conversation with a user."
        ),
    },
    {
        "role": "user",
        "content": (
            # user will ask a question
            prompt
        ),
    },
]

In [26]:
client=OpenAI(api_key=perplexity_api_key,base_url="https://api.perplexity.ai")
response=client.chat.completions.create(
    model=model,
    messages=prompt
)

In [29]:
response.choices[0].message.content

"A Retrieval-Augmented Generation (RAG) system is an AI framework that combines the capabilities of large language models (LLMs) with external data sources to provide more accurate, up-to-date, and context-specific responses. Here’s a detailed overview:\n\n1. **Two Phases: Ingestion and Retrieval**\n   - **Ingestion Phase:** This involves creating an index of external data sources, such as databases, documents, or web pages. The data is processed into dense vector representations (embeddings) that allow for efficient retrieval[1][2].\n   - **Retrieval Phase:** When a user asks a question, the system retrieves relevant information from the indexed data sources. This information is then integrated into the LLM's input, enabling it to generate more accurate and contextually relevant responses[1][2].\n\n2. **Key Benefits:**\n   - **Grounded Generation:** RAG ensures that LLMs are grounded on the most current and reliable facts, reducing the risk of generating false or outdated information[

### RAG testing 

In [30]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch
import pandas as pd
from giskard.rag import KnowledgeBase 
from giskard.rag import generate_testset 

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
loader=PyPDFLoader("attention_is_all_you_need.pdf")
attention=loader.load()
documents=text_splitter.split_documents(attention)
vectorstore=DocArrayInMemorySearch.from_documents(
    documents,embedding=OpenAIEmbeddings()
)
df=pd.DataFrame([file.page_content for file in documents],columns=["text"])
knowledge_base=KnowledgeBase(df)
testset=generate_testset(
    knowledge_base,
    num_questions=60,
    agent_description="A Chat bot answering questions related to Pdf uploaded by Users"
)
test_set_df=testset.to_pandas()

2024-12-25 22:14:08,777 pid:8608 MainThread giskard.rag  INFO     Finding topics in the knowledge base.




2024-12-25 22:14:15,580 pid:8608 MainThread giskard.rag  INFO     Found 5 topics in the knowledge base.


Generating questions: 100%|██████████| 60/60 [02:38<00:00,  2.64s/it]


In [31]:
testset.save("test-set.json")

### OPENAI

In [32]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based on provided context.
Think step by step before providing a detailed answer.
<context>
{context}
</context>
Question:{input}
"""
)

query="Who are authors of Attention is all you need paper ? ?"
retrieved_results=vectorstore.similarity_search(query)
context=retrieved_results[0].page_content
context

'Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurren

In [33]:
chain=prompt|ChatOpenAI()|StrOutputParser()
response=chain.invoke(
    {
        "context":context,
        "input":query
    }
)
response

'The authors of the "Attention Is All You Need" paper are Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, and Illia Polosukhin.'

In [34]:
def answer_fn(query,history=None):
    query=query
    retrieved_results=vectorstore.similarity_search(query)
    context=retrieved_results[0].page_content
    chain=prompt|ChatOpenAI()|StrOutputParser()
    return chain.invoke({"context":context,"input":query })

In [35]:
from giskard.rag import evaluate

report=evaluate(answer_fn,testset=testset,knowledge_base=knowledge_base)

Asking questions to the agent: 100%|██████████| 60/60 [02:23<00:00,  2.39s/it]
CorrectnessMetric evaluation: 100%|██████████| 60/60 [01:07<00:00,  1.12s/it]


In [36]:
display(report)

In [37]:
report.correctness_by_question_type()

Unnamed: 0_level_0,correctness
question_type,Unnamed: 1_level_1
complex,0.7
conversational,0.0
distracting element,0.5
double,1.0
simple,0.6
situational,0.8


### Perplexity

In [38]:
from openai import OpenAI
from langchain.prompts import ChatPromptTemplate

client = OpenAI(api_key=perplexity_api_key, base_url="https://api.perplexity.ai")

prompt_template = ChatPromptTemplate.from_template(
    """
Answer the following question based on the provided context.
Think step by step before providing a detailed answer.
<context>
{context}
</context>
Question: {input}
""" 
)

In [39]:
query = "What is the name of Vice Chancellor ?"
retrieved_results = vectorstore.similarity_search(query)  
context = retrieved_results[0].page_content

messages = [
    {"role": "system", "content": "You are an AI assistant trained to answer based on provided context."},
    {"role": "user", "content": prompt_template.format(context=context, input=query)},
]

response = client.chat.completions.create(
    model="llama-3.1-sonar-huge-128k-online",
    messages=messages
)

answer = response.choices[0].message.content
print(answer)

The provided context does not mention the name of a Vice Chancellor. The references listed are related to research papers on neural machine translation and parsing, and do not include information about university administrators or their titles. Therefore, it is not possible to answer the question based on the given context.


In [40]:
def answer_fn(query, history=None):
    retrieved_results = vectorstore.similarity_search(query)
    context = retrieved_results[0].page_content
    messages = [
        {"role": "system", "content": "You are an AI assistant trained to answer based on provided context."},
        {"role": "user", "content": prompt_template.format(context=context, input=query)},
    ]
    response = client.chat.completions.create(
        model="llama-3.1-sonar-huge-128k-online",
        messages=messages
    )
    return response.choices[0].message.content

In [41]:
from giskard.rag import evaluate

report = evaluate(answer_fn, testset=testset, knowledge_base=knowledge_base)

Asking questions to the agent: 100%|██████████| 60/60 [12:22<00:00, 12.37s/it]
CorrectnessMetric evaluation: 100%|██████████| 60/60 [01:03<00:00,  1.05s/it]


In [42]:
display(report)

In [43]:
report.correctness_by_question_type()

Unnamed: 0_level_0,correctness
question_type,Unnamed: 1_level_1
complex,0.8
conversational,0.1
distracting element,0.5
double,1.0
simple,0.9
situational,0.9


### Perplexity2

In [44]:
def answer_fn(query, history=None):
    retrieved_results = vectorstore.similarity_search(query)
    context = retrieved_results[0].page_content
    messages = [
        {"role": "system", "content": "You are an AI assistant trained to answer based on provided context."},
        {"role": "user", "content": prompt_template.format(context=context, input=query)},
    ]
    response = client.chat.completions.create(
        model="llama-3.1-sonar-large-128k-online",
        messages=messages
    )
    return response.choices[0].message.content

from giskard.rag import evaluate
report = evaluate(answer_fn, testset=testset, knowledge_base=knowledge_base)

Asking questions to the agent: 100%|██████████| 60/60 [07:15<00:00,  7.26s/it]
CorrectnessMetric evaluation: 100%|██████████| 60/60 [00:52<00:00,  1.15it/s]


In [45]:
display(report)

In [46]:
report.correctness_by_question_type()

Unnamed: 0_level_0,correctness
question_type,Unnamed: 1_level_1
complex,0.8
conversational,0.1
distracting element,0.7
double,1.0
simple,0.9
situational,0.9


### Perplexity3

In [47]:
def answer_fn(query, history=None):
    retrieved_results = vectorstore.similarity_search(query)
    context = retrieved_results[0].page_content
    messages = [
        {"role": "system", "content": "You are an AI assistant trained to answer based on provided context."},
        {"role": "user", "content": prompt_template.format(context=context, input=query)},
    ]
    response = client.chat.completions.create(
        model="llama-3.1-sonar-small-128k-online",
        messages=messages
    )
    return response.choices[0].message.content

from giskard.rag import evaluate
report = evaluate(answer_fn, testset=testset, knowledge_base=knowledge_base)

Asking questions to the agent: 100%|██████████| 60/60 [02:52<00:00,  2.87s/it]
CorrectnessMetric evaluation: 100%|██████████| 60/60 [00:56<00:00,  1.06it/s]


In [48]:
display(report)

In [49]:
report.correctness_by_question_type()  

Unnamed: 0_level_0,correctness
question_type,Unnamed: 1_level_1
complex,0.8
conversational,0.0
distracting element,0.4
double,0.8
simple,0.8
situational,0.8
