# Rag chatbot

## Import Libraries

In [15]:
import fitz
import numpy as np
import json
import re
import time
from tqdm import tqdm
import warnings
import google.api_core.exceptions
from sentence_transformers import SentenceTransformer

from gemini_setup import model, system_prompt

warnings.filterwarnings("ignore", category=FutureWarning)

In [16]:
print("Using model:", model._model_name)


Using model: models/gemini-2.0-flash


In [17]:
def extracted_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    all_text = ""
    for page in doc:
        all_text += page.get_text("text")
    doc.close()
    return all_text

In [18]:
def chunk_text_func(text, max_tokens=300, overlap=50):
    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
    chunks, chunk = [], []
    total_words = 0
    for sentence in sentences:
        words = sentence.split()
        if total_words + len(words) > max_tokens:
            chunks.append(' '.join(chunk))
            if overlap > 0:
                chunk = ' '.join(chunk[-overlap:]).split()
                total_words = len(chunk)
            else:
                chunk, total_words = [], 0
        chunk.extend(words)
        total_words += len(words)
    if chunk:
        chunks.append(' '.join(chunk))
    return chunks


In [19]:
pdf_path = "A Comprehensive Analysis of Liver Disease Detection Using Advanced Machine Learning Algorithms.pdf"
extracted_text = extracted_pdf(pdf_path)
chunk_text = chunk_text_func(extracted_text, max_tokens=300, overlap=50)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
response = embedding_model.encode(chunk_text)

In [20]:
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def semantic_search(query, chunk_text, embeddings, k=2):
    query_embedding = embedding_model.encode([query])[0]
    scores = [(i, cosine_similarity(query_embedding, emb)) for i, emb in enumerate(embeddings)]
    top_indices = sorted(scores, key=lambda x: x[1], reverse=True)[:k]
    return [chunk_text[i] for i, _ in top_indices]

In [21]:
def generate_response_gemini(system_prompt, top_chunks, query, max_chunk_chars=1200, max_retries=3):
    truncated_chunks = [c[:max_chunk_chars] for c in top_chunks]
    user_prompt = "\n".join([f"Context {i+1}:\n{c}\n=====================" for i, c in enumerate(truncated_chunks)])
    user_prompt += f"\n\nQuestion: {query}"
    full_prompt = f"{system_prompt}\n\n{user_prompt}"

    for attempt in range(1, max_retries+1):
        try:
            response = model.generate_content(full_prompt)
            return response.text.strip()
        except google.api_core.exceptions.ResourceExhausted:
            wait = 10 * attempt
            print(f"⚠️ ResourceExhausted: Waiting {wait}s (Attempt {attempt}/{max_retries})")
            time.sleep(wait)
        except Exception as e:
            print(f"❌ Unexpected error: {e}")
            return f"ERROR: {e}"
    return "❌ ERROR: Failed after multiple retries"


In [22]:
with open("val.json", "r") as f:
    full_data = json.load(f)
    data = full_data[:10]  # only 10 questions

all_responses = []

In [23]:
for idx, item in enumerate(tqdm(data, desc="Generating Gemini answers")):
    query = item['question']
    top_chunks = semantic_search(query, chunk_text, response, k=2)
    gemini_reply = generate_response_gemini(system_prompt, top_chunks, query)

    all_responses.append({
        "question": query,
        "answer": gemini_reply
    })

    if len(all_responses) % 5 == 0:
        with open("gemini_partial.json", "w") as f:
            json.dump(all_responses, f, indent=2)
        print(f"💾 Saved {len(all_responses)} partial responses")

    time.sleep(1.2)  # to avoid rate limit

with open("gemini_responses_10.json", "w") as f:
    json.dump(all_responses, f, indent=2)

print("✅ Saved all 10 answers to gemini_responses_10.json")

Generating Gemini answers:  40%|████      | 4/10 [00:09<00:13,  2.21s/it]

💾 Saved 5 partial responses


Generating Gemini answers:  90%|█████████ | 9/10 [00:19<00:02,  2.03s/it]

💾 Saved 10 partial responses


Generating Gemini answers: 100%|██████████| 10/10 [00:21<00:00,  2.15s/it]

✅ Saved all 10 answers to gemini_responses_10.json





In [24]:
evaluate_system_prompt = (
    "You are an intelligent evaluation system tasked with assessing the AI assistant's responses. "
    "If the AI assistant's response is very close to the true response, assign a score of 1. "
    "If the response is incorrect or unsatisfactory in relation to the true response, assign a score of 0. "
    "If the response is partially aligned with the true response, assign a score of 0.5.\n\n"
    "Only return the number (0, 0.5, or 1)."
)

def evaluate_response(query, gemini_reply, true_answer, max_retries=3):
    eval_prompt = (
        f"User Query: {query}\n"
        f"AI Response:\n{gemini_reply}\n"
        f"True Response:\n{true_answer}\n\n"
        f"{evaluate_system_prompt}"
    )

    for attempt in range(max_retries):
        try:
            result = model.generate_content(eval_prompt)
            return result.text.strip()
        except Exception as e:
            print(f"⚠️ Eval retry ({attempt+1}/3): {e}")
            time.sleep(5)
    return "ERROR"

In [25]:
evaluations = []

for i in range(len(data)):
    question = data[i]["question"]
    ideal_answer = data[i]["ideal_answer"]
    generated_answer = all_responses[i]["answer"]
    
    score = evaluate_response(question, generated_answer, ideal_answer)
    
    evaluations.append({
        "question": question,
        "ideal_answer": ideal_answer,
        "generated_answer": generated_answer,
        "score": score
    })

with open("gemini_evaluations_10.json", "w") as f:
    json.dump(evaluations, f, indent=2)

print("✅ Evaluations saved to gemini_evaluations_10.json")

⚠️ Eval retry (1/3): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 48
}
]
✅ Evaluations saved to gemini_evaluations_10.json


In [None]:
import json

# Load evaluation results from file
with open("gemini_evaluations_10.json", "r") as f:
    eval_data = json.load(f)

# Parse numeric scores only (ignore invalid ones)
scores = []
for item in eval_data:
    try:
        score = float(item["score"])
        if score in [0, 0.5, 1.0]:
            scores.append(score)
    except:
        continue

# Compute average score
if scores:
    average_score = sum(scores) / len(scores)
    print(f"✅ Average Evaluation Score: {average_score:.2f} (from {len(scores)} responses)")
else:
    print("⚠️ No valid scores found in evaluation_results.json")


FileNotFoundError: [Errno 2] No such file or directory: 'evaluation_results.json'