In [1]:
import json

In [2]:
import os
from datetime import datetime
from dotenv import load_dotenv
from langchain_pinecone import PineconeVectorStore
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Define log file path
LOG_FILE = "log_queries2.log"

def write_log(message, error=False):
    """Writes log messages with timestamps. Errors are marked separately."""
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    log_type = "ERROR" if error else "INFO"
    with open(LOG_FILE, "a") as log_file:
        log_file.write(f"{timestamp} - {log_type}: {message}\n")

class ChatbotRAG:
    def __init__(self, index_name: str):
        try:
            load_dotenv()
            self.gemini_api_key = os.getenv("GEMINI_API3")
            self.pinecone_api_key = os.getenv("PINECONE_API")
            os.environ["PINECONE_API_KEY"] = self.pinecone_api_key

            if not self.gemini_api_key or not self.pinecone_api_key:
                raise ValueError("API keys are missing. Check your .env file.")

            self.index_name = index_name
            self.embeddings = self._initialize_embeddings()
            self.retriever = self._initialize_retriever()
            self.llm = self._initialize_llm()
            self.rag_chain = self._initialize_rag_chain()
            
            write_log("Chatbot initialized successfully!")
        except Exception as e:
            write_log(f"Error initializing chatbot: {str(e)}", error=True)
            raise e
    
    def _initialize_embeddings(self):
        try:
            return GoogleGenerativeAIEmbeddings(
                model="models/embedding-001", 
                google_api_key=self.gemini_api_key
            )
        except Exception as e:
            write_log(f"Error initializing embeddings: {str(e)}", error=True)
            raise e
    
    def _initialize_retriever(self):
        try:
            docsearch = PineconeVectorStore.from_existing_index(
                index_name=self.index_name,
                embedding=self.embeddings
            )
            return docsearch.as_retriever(
                search_type="similarity", 
                search_kwargs={"k": 3},
            )
        except Exception as e:
            write_log(f"Error initializing retriever: {str(e)}", error=True)
            raise e
    
    def _initialize_llm(self):
        try:
            return ChatGoogleGenerativeAI(
                model="gemini-2.0-flash",
                api_key=self.gemini_api_key
            )
        except Exception as e:
            write_log(f"Error initializing LLM: {str(e)}", error=True)
            raise e
    
    def _initialize_rag_chain(self):
        try:
            system_prompt = (
                "You are an assistant for question-answering tasks. "
                "Use the following pieces of retrieved context to answer "
                "the question. If you don't know the answer, say that you "
                "don't know. Use three sentences maximum and keep the "
                "answer concise."
                "\n\n"
                "{context}"
            )
            prompt = ChatPromptTemplate.from_messages([
                ("system", system_prompt),
                ("human", "{input}"),
            ])
            question_answer_chain = create_stuff_documents_chain(self.llm, prompt)
            return create_retrieval_chain(self.retriever, question_answer_chain)
        except Exception as e:
            write_log(f"Error initializing RAG chain: {str(e)}", error=True)
            raise e
    
    def ask_question(self, question: str):
        try:
            if not question.strip():
                raise ValueError("Question cannot be empty.")

            response = self.rag_chain.invoke({"input": question})
            return response
        except Exception as e:
            write_log(f"Error processing question '{question}': {str(e)}", error=True)
            return "An error occurred while processing your question."


try:
    chatbot = ChatbotRAG(index_name="chatbot4")
except Exception as main_error:
    write_log(f"Unhandled error in main executio)n: {str(main_error)}", error=True)
    with open(LOG_FILE, "a") as log_file:
        log_file.write(f"{'-'*40}\n")
    raise main_error

with open(LOG_FILE, "a") as log_file:
    log_file.write(f"{'-'*40}\n")


  from tqdm.autonotebook import tqdm


In [3]:
llm = chatbot.llm
embeddings = chatbot.embeddings
retriever = chatbot.retriever

In [None]:
import pandas as pd
import numpy as np
from ragas import EvaluationDataset, evaluate
from ragas.metrics import (
    LLMContextRecall,
    Faithfulness,
    SemanticSimilarity,
    AnswerCorrectness,
)
from langchain_google_genai import ChatGoogleGenerativeAI
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

file_path = "log_queries2.json"

# Load the JSON file
with open(file_path, "r") as file:
    data = json.load(file)  # Read the JSON content

puki = data
print(len(puki))
def replace_nan(value):
    """Replaces NaN values with 'N/A'."""
    return "N/A" if isinstance(value, (float, np.float32, np.float64)) and np.isnan(value) else value


# Load the CSV file
df = pd.read_csv("log_queries2.csv")  # Replace with your actual CSV file

# Iterate row by row and access columns 1, 2, and 3
for index, row in df.iterrows():
    if index >= 570:
        query = row[0]  # First column
        reference = row[1]  # Second column
        response = row[2]  # Third column



        dataset = []
        for query, reference in zip([query],[reference]):
            relevant_docs = retriever.invoke(query)
            dataset.append(
                {
                    "user_input": query,
                    "retrieved_contexts": [rdoc.page_content for rdoc in relevant_docs],
                    "response": response,
                    "reference": reference,
                }
            )

        evaluation_dataset = EvaluationDataset.from_list(dataset)
        eval_llm = llm
        evaluator_llm = LangchainLLMWrapper(eval_llm)
        embeddings = LangchainEmbeddingsWrapper(embeddings)
        result = evaluate(
            dataset=evaluation_dataset,
            embeddings=embeddings,
            metrics=[
                LLMContextRecall(), Faithfulness(),
                SemanticSimilarity(), AnswerCorrectness()
            ],
            llm=evaluator_llm,
        )

        eval_scores = []
        for score_dict in result.scores:
            eval_scores.append({
                "context_recall": replace_nan(score_dict.get("context_recall", 0)),
                "faithfulness": replace_nan(score_dict.get("faithfulness", 0)),
                "semantic_similarity": replace_nan(score_dict.get("semantic_similarity", 0)),
                "answer_correctness": replace_nan(score_dict.get("answer_correctness", 0))
            })

        for item, score in zip(dataset, eval_scores):
            item["evaluation_result"] = score
            puki.append({'quetion':query,'eval':item['evaluation_result']})
            with open("log_queries2.json",'w') as f:
                f.write(json.dumps(puki,indent=4))

    if index == 669:
        break


570


  query = row[0]  # First column
  reference = row[1]  # Second column
  response = row[2]  # Third column
Evaluating:  50%|█████     | 2/4 [00:02<00:02,  1.22s/it]Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 23
}
].
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  descript

KeyboardInterrupt: 

Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 13
}
].
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 12
}
].
Retrying langchain_google_