In [3]:
from doc_loader import DocumentChunker
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
import google.generativeai as genai 
import os
from langchain_experimental.agents import create_csv_agent
from langchain_community.vectorstores import Qdrant
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from qdrant_client import QdrantClient
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
# Initializing clients

# RAGAS 

In [4]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

# Test Dataset

In [42]:

# data
from datasets import load_dataset
import pandas as pd

fiqa_eval = load_dataset("explodinggradients/fiqa", "ragas_eval")
print(fiqa_eval["baseline"])
df_fiqa_eval = pd.DataFrame(fiqa_eval["baseline"])
df_fiqa_eval = df_fiqa_eval.drop('answer',axis=1)
df_fiqa_eval.rename(columns = {'ground_truths':'ground_truth'}, inplace = True)
test_questions = []
contexts = []
test_ground_truth = []
for i in range(0,len(df_fiqa_eval)):
    test_questions.append(df_fiqa_eval["question"].iloc[i])
    contexts.append(df_fiqa_eval["contexts"].iloc[i])
    test_ground_truth.append(df_fiqa_eval["ground_truth"].iloc[i])

Found cached dataset fiqa (C:/Users/Anirudh/.cache/huggingface/datasets/explodinggradients___fiqa/ragas_eval/1.0.0/3dc7b639f5b4b16509a3299a2ceb78bf5fe98ee6b5fee25e7d5e4d290c88efb8)


  0%|          | 0/1 [00:00<?, ?it/s]

Dataset({
    features: ['question', 'ground_truths', 'answer', 'contexts'],
    num_rows: 30
})


In [6]:
# data
from datasets import load_dataset
import pandas as pd

amnesty_qa = load_dataset("explodinggradients/amnesty_qa", "english_v2")
df_amnesty_qa = pd.DataFrame(amnesty_qa["eval"])
df_amnesty_qa = df_amnesty_qa.drop('answer',axis=1)
test_questions = []
contexts = []
test_ground_truth = []
for i in range(0,len(df_amnesty_qa)):
    test_questions.append(df_amnesty_qa["question"].iloc[i])
    contexts.append(df_amnesty_qa["contexts"].iloc[i])
    test_ground_truth.append(df_amnesty_qa["ground_truth"].iloc[i])

Found cached dataset amnesty_qa (C:/Users/Anirudh/.cache/huggingface/datasets/explodinggradients___amnesty_qa/english_v2/2.0.0/d0ed9800191a31943ee52a5c22ee4305e28a33f5edcd9a323802112cff07cc24)


  0%|          | 0/1 [00:00<?, ?it/s]

# LLM config

In [16]:
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
google_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
google_llm = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0.3,convert_system_message_to_human=True, request_timeout=120)

In [17]:
course_name = "XYZ channel"
q_db = {
        "url": "http://localhost:6333",
        "collection_name": "ssngpt_collection" ,
        "embeddings": GoogleGenerativeAIEmbeddings(model="models/embedding-001"),
        "course_id": course_name
    }

# Defining retriver 

client = QdrantClient(
            url = q_db["url"],
            prefer_grpc= False
        )

vector_store = Qdrant(
            client = client,
            embeddings = q_db["embeddings"],
            collection_name = q_db["collection_name"]
        )
    

retriever = vector_store.as_retriever()

In [18]:
from operator import itemgetter
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

google_llm = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0.3,convert_system_message_to_human=True, request_timeout=120)

template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':

Context:
{context}

Question:
{question}
"""

prompt = ChatPromptTemplate.from_template(template)

output_parser = StrOutputParser()

retrieval_augmented_qa_chain = prompt | google_llm | output_parser


In [11]:
answers = []

for i in range(len(df_amnesty_qa)):
    response = retrieval_augmented_qa_chain.invoke({"question" : test_questions[i], "context": contexts[i][0]})
  #answers.append(response["response"].content)
    print(response)
    answers.append(response)

- The ruling has laid the groundwork for anti-abortion legislative and policy attacks in other countries.
- It has impacted progressive law reform and stalled the adoption and enforcement of abortion guidelines in certain African countries.
- It has created a chilling effect in international policy spaces, emboldening anti-abortion state and non-state actors to undermine human rights protections.
According to the Carbon Majors database, 100 existing fossil fuel companies (and eight that no longer exist) have produced almost a trillion tons of GHG emissions in 150 years. These 100 companies are responsible for 71% of all GHG emissions since 1988.
ExxonMobil, Chevron and Peabody
I don't know
Amnesty International recommends that the Special Rapporteur on the Situation of Human Rights Defenders:
- embeds a focus on child and young HRDs in the future work of the mandate
- raises awareness about the differences between "child" and "youth" HRDs and the different context, needs, barriers, and

In [23]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_ground_truth
})

In [24]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision
)

In [None]:
from ragas import evaluate

result = evaluate(
    amnesty_qa["eval"].select(range(10)),
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall
    ],  llm=google_llm, embeddings=google_embeddings
)



Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<loc

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 60.0 seconds as it raised ResourceExhausted: 429 Quota exceeded for quota metric 'Generate Content API requests per minute' and limit 'GenerateContent request limit per minute for a region' of service 'generativelanguage.googleapis.com' for consumer 'project_number:768539581369'. [reason: "RATE_LIMIT_EXCEEDED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "quota_metric"
  value: "generativelanguage.googleapis.com/generate_content_requests"
}
metadata {
  key: "quota_location"
  value: "us-west4"
}
metadata {
  key: "quota_limit"
  value: "GenerateContentRequestsPerMinutePerProjectPerRegion"
}
metadata {
  key: "quota_limit_value"
  value: "60"
}
metadata {
  key: "consumer"
  value: "projects/768539581369"
}
, links {
  description: "Request a higher quota limit."
  url: "https://cloud.google.com/docs/quota#requesting_higher_q

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 32.0 seconds as it raised ResourceExhausted: 429 Quota exceeded for quota metric 'Generate Content API requests per minute' and limit 'GenerateContent request limit per minute for a region' of service 'generativelanguage.googleapis.com' for consumer 'project_number:768539581369'. [reason: "RATE_LIMIT_EXCEEDED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "quota_metric"
  value: "generativelanguage.googleapis.com/generate_content_requests"
}
metadata {
  key: "quota_location"
  value: "us-west4"
}
metadata {
  key: "quota_limit"
  value: "GenerateContentRequestsPerMinutePerProjectPerRegion"
}
metadata {
  key: "quota_limit_value"
  value: "60"
}
metadata {
  key: "consumer"
  value: "projects/768539581369"
}
, links {
  description: "Request a higher quota limit."
  url: "https://cloud.google.com/docs/quota#requesting_higher_q