### 1.SETUP

In [1]:
!pip install PyPDF2 sentence-transformers chromadb fastapi uvicorn ollama numpy scikit-learn
import PyPDF2
import re
import json
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.utils import embedding_functions
import ollama
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from fastapi import FastAPI, HTTPException
import uvicorn
import asyncio
from pydantic import BaseModel
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


[notice] A new release of pip available: 22.3 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
     ------------------------------------ 232.6/232.6 kB 712.6 kB/s eta 0:00:00
Collecting chromadb
  Using cached chromadb-1.0.15-cp39-abi3-win_amd64.whl (19.5 MB)
Collecting fastapi
  Using cached fastapi-0.116.1-py3-none-any.whl (95 kB)
Collecting uvicorn
  Using cached uvicorn-0.35.0-py3-none-any.whl (66 kB)
Collecting ollama
  Downloading ollama-0.5.1-py3-none-any.whl (13 kB)
Collecting build>=1.0.3
  Using cached build-1.2.2.post1-py3-none-any.whl (22 kB)
Collecting pybase64>=1.4.1
  Using cached pybase64-1.4.1-cp311-cp311-win_amd64.whl (36 kB)
Collecting posthog<6.0.0,>=2.4.0
  Using cached posthog-5.4.0-py3-none-any.whl (105 kB)
Collecting onnxruntime>=1.14.1
  Using cached onnxruntime-1.22.1-cp311-cp311-win_amd64.whl (12.7 MB)
Collecting opentelemetry-api>=1.2.0
  Using cached opentelemetry_api-1.35.0-py3-none-any.whl (65 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0
  Using cached openteleme

### 2: Text Extraction and Dataset Loading

In [2]:
def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ''
            for page in reader.pages:
                page_text = page.extract_text() or ''
                # Clean text: remove repetitive characters and normalize whitespace
                cleaned_text = re.sub(r'(\w)\1{2,}', '', page_text)
                cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
                text += cleaned_text + ' '
        return text.strip()
    except Exception as e:
        logger.error(f"Error extracting text from PDF: {e}")
        return ''

# Simulate extracted text due to garbled PDF content
sample_pdf_text = """
অমর সেনের থিসিসে জলবায়ু পরিবর্তন এবং এর অর্থনৈতিক প্রভাব নিয়ে আলোচনা করা হয়েছে। 
তিনি ম্যাথমেটিক্যাল মডেলিং ব্যবহার করে বিশ্লেষণ করেছেন। 
তার কাজের জন্য ২০১৮ সালে তিনি নোবেল পুরস্কার পান।
Amartya Sen's thesis discusses climate change and its economic impacts. 
He used mathematical modeling for analysis. 
He received the Nobel Prize in 2018 for his work.
"""

# Load QA dataset
def load_qa_dataset(json_path):
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        # Combine Bengali and English QA pairs
        qa_texts = []
        for pair in data['bangla_qa_pairs'] + data['english_qa_pairs']:
            qa_texts.append(f"Context: {pair['context']}\nQuestion: {pair['question']}\nAnswer: {pair['answer']}")
        return qa_texts
    except Exception as e:
        logger.error(f"Error loading QA dataset: {e}")
        return []

# Combine PDF text and QA dataset
pdf_text = sample_pdf_text  # Replace with extract_text_from_pdf('HSC26-Bangla1st-Paper.pdf') if PDF is available
qa_texts = load_qa_dataset('bangla_english_qa_dataset.json')
corpus_text = pdf_text + '\n' + '\n'.join(qa_texts)
logger.info("Text extraction and QA dataset loading completed.")

INFO:__main__:Text extraction and QA dataset loading completed.


###  3: Document Chunking

In [3]:
def chunk_text(text, max_chunk_size=200):
    sentences = re.split(r'(?<=[।.!?])\s+', text)
    chunks = []
    current_chunk = ''
    for sentence in sentences:
        if len(current_chunk) + len(sentence) <= max_chunk_size:
            current_chunk += sentence + ' '
        else:
            if current_chunk:
                chunks.append(current_chunk.strip())
                current_chunk = sentence + ' '
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

chunks = chunk_text(corpus_text)
logger.info(f"Created {len(chunks)} chunks.")
for i, chunk in enumerate(chunks[:5]):  # Show first 5 chunks for brevity
    print(f"Chunk {i+1}: {chunk}")

INFO:__main__:Created 123 chunks.


Chunk 1: অমর সেনের থিসিসে জলবায়ু পরিবর্তন এবং এর অর্থনৈতিক প্রভাব নিয়ে আলোচনা করা হয়েছে। তিনি ম্যাথমেটিক্যাল মডেলিং ব্যবহার করে বিশ্লেষণ করেছেন। তার কাজের জন্য ২০১৮ সালে তিনি নোবেল পুরস্কার পান।
Chunk 2: Amartya Sen's thesis discusses climate change and its economic impacts. He used mathematical modeling for analysis. He received the Nobel Prize in 2018 for his work. Context: আমার বয়স সাতার মাত্র।
Chunk 3: এই জীবনটা না দদীঘিযি হাসাবে ব্যে, না গুনি হাসাবে। তবু ইহার একটু বিশেষ মূল্য আছে।
Chunk 4: ইহা যেই ফুলের মতা যাহার বুক্কি উপরি ভ্রমর আর্স া ব্র্স ারিল, এবং যেই পদক্ষেপি ইতিহাস তাহার জীবনের মাঝখানে ফুলের মতা গুটি ধরিয়া উঠি াছে। Question: অনুপম তার জীবনের মূল্য সম্পর্কে কী বলেছেন?
Chunk 5: Answer: অনুপম বলেছেন যে তার জীবন দীর্ঘ বা গুণে হাসাবে নয়, তবে এর একটু বিশেষ মূল্য আছে, যেমন একটি ফুলের মতো যার উপর ভ্রমর এসেছে এবং যার জীবনের মাঝখানে ইতিহাস ফুলের মতো গুটি ধরেছে।


###  4: Vectorization and Storage

In [4]:
# Initialize embedding model
embedding_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')

# Initialize Chroma client
client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(
    name="rag_corpus",
    embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
        model_name='paraphrase-multilingual-MiniLM-L12-v2'
    )
)

# Embed and store chunks
for i, chunk in enumerate(chunks):
    collection.add(
        documents=[chunk],
        ids=[f"chunk_{i}"],
        metadatas=[{"source": "HSC26-Bangla1st-Paper.pdf_and_qa_dataset", "chunk_id": i}]
    )
logger.info("Chunks vectorized and stored in Chroma.")

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: paraphrase-multilingual-MiniLM-L12-v2


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: paraphrase-multilingual-MiniLM-L12-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Chunks vectorized and stored in Chroma.


### 5: Simulated Fine-Tuning

In [5]:
# Simulated fine-tuning: Prepare QA pairs as additional context
def prepare_fine_tune_context(qa_texts, max_examples=5):
    return '\n'.join(qa_texts[:max_examples])  # Limit to avoid overwhelming prompt

fine_tune_context = prepare_fine_tune_context(qa_texts)
logger.info("Simulated fine-tuning context prepared.")

INFO:__main__:Simulated fine-tuning context prepared.


### 6: Query Processing and Answer Generation

In [6]:
# Short-term memory (conversation history)
conversation_history = []

def process_query(query, max_results=3):
    # Embed query
    query_embedding = embedding_model.encode(query)
    
    # Retrieve relevant chunks
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=max_results
    )
    
    # Extract relevant documents and scores
    retrieved_docs = results['documents'][0]
    retrieved_scores = results['distances'][0]
    context = ' '.join(retrieved_docs)
    
    # Prepare prompt with fine-tuned context and conversation history
    history_prompt = '\n'.join([f"User: {h['query']}\nAssistant: {h['response']}" for h in conversation_history[-3:]])
    prompt = f"""
    Fine-Tuned Context:
    {fine_tune_context}
    
    Retrieved Context:
    {context}
    
    Recent Conversation:
    {history_prompt}
    
    User Query: {query}
    
    Provide a concise answer based on the context in the same language as the query.
    """
    
    # Generate answer using Ollama
    try:
        response = ollama.generate(model='mistral', prompt=prompt)['response']
    except Exception as e:
        logger.error(f"Error generating response: {e}")
        response = "Sorry, I couldn't generate a response."
    
    # Update conversation history
    conversation_history.append({"query": query, "response": response})
    
    return response, retrieved_docs, retrieved_scores

# Test queries
test_queries = [
    "অমর সেনের থিসিসে মূল বিষয় কী বর্ণনা করা হয়েছে?",
    "অনুপম তার জীবনের মূল্য সম্পর্কে কী বলেছেন?",
    "What is the main topic of Amartya Sen's thesis?",
    "What does Anupam say about the value of his life?"
]

for query in test_queries:
    response, docs, scores = process_query(query)
    print(f"Query: {query}")
    print(f"Response: {response}")
    print(f"Retrieved Docs: {docs}")
    print(f"Scores: {scores}\n")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Query: অমর সেনের থিসিসে মূল বিষয় কী বর্ণনা করা হয়েছে?
Response:  অমর সেনের থিসিসে বর্ণনা করা হয়েছে, যে তিনি একটি সংজ্ঞাযুলী হিসেবে অপরিচিতা গল্পের মূল প্রtagonist ছিলেন।
Retrieved Docs: ['Question: অনুপমের মামা কেন হরিশের সঙ্গে কল্যাণীর বিয়ের প্রস্তাব প্রত্যাখ্যান করেন?', 'Question: ‘অপরিচিতা’ গল্পে রবীন্দ্রনাথ ঠাকুর কোন বিষয়ের পক্ষে বার্তা প্রদান করেছেন?', 'Question: অনুপমের মামা গহনা পরীক্ষার সময় কী বলেন এবং এটি কী প্রকাশ করে?']
Scores: [2.1590614318847656, 2.246817111968994, 2.4049301147460938]



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Query: অনুপম তার জীবনের মূল্য সম্পর্কে কী বলেছেন?
Response:  অনুপম বলেছেন, তার জীবন দীর্ঘ বা গুণে হাসাবে নয়, তবে এর একটু বিশেষ মূল্য আছে, যেমন একটি ফুলের মতো যার উপর ভ্রমর এসেছে এবং যার জীবনের মাঝখানে ইতিহাস ফুলের মতো গুটি ধরেছে।
Retrieved Docs: ['Answer: উদ্দীপকে পলিশ স্বাধীন মত প্রকাশের সাহস দেখিয়ে বিয়েতে অসম্মতি জানান এবং এর মাধ্যমে তার ব্যক্তিত্বের প্রকাশ ঘটে।', 'Answer: অনুপমের মায়ের অতিরিক্ত স্নেহের ফলে সে একজন ব্যক্তিত্বহীন ও নির্ভরশীল ব্যক্তি হয়ে উঠেছে।', 'Answer: অনুপমের মামা হরিশের সঙ্গে কল্যাণীর বিয়ের প্রস্তাব প্রত্যাখ্যান করেন কারণ তিনি হরিশের যৌতুকপ্রথার প্রতি লোভী মানসিকতা পছন্দ করেননি।']
Scores: [2.0259430408477783, 2.44244647026062, 2.634687662124634]



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Query: What is the main topic of Amartya Sen's thesis?
Response:  The main topic of Amartya Sen's thesis is not explicitly mentioned in the provided context, but it is discussed that he used mathematical modeling for analysis in his work related to climate change and its economic impacts.
Retrieved Docs: ["Amartya Sen's thesis discusses climate change and its economic impacts. He used mathematical modeling for analysis. He received the Nobel Prize in 2018 for his work. Context: আমার বয়স সাতার মাত্র।", 'Question: Who is the author of the story ‘Aparichita’? Answer: Rabindranath Tagore\nContext: Kalyani’s father, Shambhunath Sen, worked in the railway department.', 'Answer: রমা যৌতুকের দাবি প্রত্যাখ্যান করে স্বাধীনভাবে নিজের জীবন গড়ার সিদ্ধান্ত নেন। Context: ‘অপরিচিতা’ গল্পে রবীন্দ্রনাথ ঠাকুর নারীর আত্মমর্যাদা ও স্বাধীনতার পক্ষে শক্তিশালী বার্তা প্রদান করেছেন।']
Scores: [16.19892120361328, 19.140451431274414, 20.663990020751953]



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Query: What does Anupam say about the value of his life?
Response:  Anupam says his life has special value, like a flower visited by a bee, with history forming like a bud in its life.
Retrieved Docs: ['Answer: Anupam says his life is neither long nor full of qualities, but it has a special value, like a flower visited by a bee, with history forming like a bud in its life.', 'It is like a flower on which a bee has landed, and in the middle of whose life history has begun to form like a bud. Question: What does Anupam say about the value of his life?', 'Question: Where did Anupam work? Answer: In a bank\nContext: Anupam was an honest, educated, and confident suitor, though his mother’s excessive affection made him personality-less.']
Scores: [9.333261489868164, 11.786201477050781, 15.223621368408203]



### 7: REST API

In [7]:
app = FastAPI(title="Multilingual RAG API with QA Dataset")

class QueryRequest(BaseModel):
    query: str

@app.post("/query")
async def query_rag(request: QueryRequest):
    try:
        response, retrieved_docs, scores = process_query(request.query)
        return {
            "query": request.query,
            "response": response,
            "retrieved_documents": retrieved_docs,
            "similarity_scores": scores
        }
    except Exception as e:
        logger.error(f"API error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# Run the API (execute in a separate terminal or script)
# uvicorn.run(app, host="0.0.0.0", port=8000)

### 8: RAG Evaluation

In [8]:
def evaluate_rag(query, response, retrieved_docs, expected_answer=None):
    # Embed query and response
    query_embedding = embedding_model.encode(query)
    response_embedding = embedding_model.encode(response)
    
    # Calculate relevance (query vs retrieved docs)
    doc_embeddings = embedding_model.encode(retrieved_docs)
    relevance_scores = cosine_similarity([query_embedding], doc_embeddings)[0]
    avg_relevance = np.mean(relevance_scores)
    
    # Calculate groundedness (response vs retrieved docs)
    groundedness_scores = cosine_similarity([response_embedding], doc_embeddings)[0]
    avg_groundedness = np.mean(groundedness_scores)
    
    # Calculate accuracy if expected answer is provided
    accuracy = None
    if expected_answer:
        expected_embedding = embedding_model.encode(expected_answer)
        accuracy = cosine_similarity([response_embedding], [expected_embedding])[0][0]
    
    return {
        "relevance_score": avg_relevance,
        "groundedness_score": avg_groundedness,
        "accuracy": accuracy
    }

# Evaluate test queries with expected answers from QA dataset
qa_pairs = load_qa_dataset('bangla_english_qa_dataset.json')
test_cases = [
    {"query": "অনুপম তার জীবনের মূল্য সম্পর্কে কী বলেছেন?", 
     "expected": "অনুপম বলেছেন যে তার জীবন দীর্ঘ বা গুণে হাসাবে নয়, তবে এর একটু বিশেষ মূল্য আছে, যেমন একটি ফুলের মতো যার উপর ভ্রমর এসেছে এবং যার জীবনের মাঝখানে ইতিহাস ফুলের মতো গুটি ধরেছে।"},
    {"query": "What does Anupam say about the value of his life?", 
     "expected": "Anupam says his life is neither long nor full of qualities, but it has a special value, like a flower visited by a bee, with history forming like a bud in its life."}
]

for case in test_cases:
    response, retrieved_docs, _ = process_query(case['query'])
    metrics = evaluate_rag(case['query'], response, retrieved_docs, case['expected'])
    print(f"Query: {case['query']}")
    print(f"Response: {response}")
    print(f"Expected: {case['expected']}")
    print(f"Metrics: {metrics}\n")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query: অনুপম তার জীবনের মূল্য সম্পর্কে কী বলেছেন?
Response:  Anupam has said that his life has special value, like a flower visited by a bee, with history forming like a bud in its life.
Expected: অনুপম বলেছেন যে তার জীবন দীর্ঘ বা গুণে হাসাবে নয়, তবে এর একটু বিশেষ মূল্য আছে, যেমন একটি ফুলের মতো যার উপর ভ্রমর এসেছে এবং যার জীবনের মাঝখানে ইতিহাস ফুলের মতো গুটি ধরেছে।
Metrics: {'relevance_score': np.float32(0.90325814), 'groundedness_score': np.float32(0.27362576), 'accuracy': np.float32(0.15280285)}



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query: What does Anupam say about the value of his life?
Response:  Anupam says his life has special value, like a flower visited by a bee, with history forming like a bud in its life.
Expected: Anupam says his life is neither long nor full of qualities, but it has a special value, like a flower visited by a bee, with history forming like a bud in its life.
Metrics: {'relevance_score': np.float32(0.65749913), 'groundedness_score': np.float32(0.7085655), 'accuracy': np.float32(0.92067456)}



### 9: Sample Test Case Evaluation

In [9]:
# Define sample test cases
sample_test_cases = [
    {
        "query": "অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?",
        "expected": "শুম্ভুনাথ"
    },
    {
        "query": "কাকে অনুপমের ভাগ্যদেবতা বলে উল্লেখ করা হয়েছে?",
        "expected": "মামাকে"
    },
    {
        "query": "বিয়ের সময় কল্যাণীর প্রকৃত বয়স কত ছিল?",
        "expected": "১৫ বছর"
    }
]

# Evaluate each test case
for case in sample_test_cases:
    # Process query
    response, retrieved_docs, scores = process_query(case['query'])
    
    # Evaluate metrics
    metrics = evaluate_rag(case['query'], response, retrieved_docs, case['expected'])
    
    # Log results
    print(f"Query: {case['query']}")
    print(f"Response: {response}")
    print(f"Expected Answer: {case['expected']}")
    print(f"Metrics: {metrics}")
    print(f"Retrieved Documents: {retrieved_docs}")
    print(f"Similarity Scores: {scores}\n")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query: অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?
Response:  অনুপমের ভাষায় এই মতারমত বলা হয়েছিল, 'একজন সুপুরুষ কাকে আমি বলতে চাই'।
(Based on the context: "Anupam has said that he wants to call someone a supurush.")
Expected Answer: শুম্ভুনাথ
Metrics: {'relevance_score': np.float32(0.9030021), 'groundedness_score': np.float32(0.5454555), 'accuracy': np.float32(0.48164445)}
Retrieved Documents: ['Answer: উদ্দীপকে পলিশ স্বাধীন মত প্রকাশের সাহস দেখিয়ে বিয়েতে অসম্মতি জানান এবং এর মাধ্যমে তার ব্যক্তিত্বের প্রকাশ ঘটে।', 'Answer: অনুপমের মায়ের অতিরিক্ত স্নেহের ফলে সে একজন ব্যক্তিত্বহীন ও নির্ভরশীল ব্যক্তি হয়ে উঠেছে।', 'Answer: অনুপমের মামা হরিশের সঙ্গে কল্যাণীর বিয়ের প্রস্তাব প্রত্যাখ্যান করেন কারণ তিনি হরিশের যৌতুকপ্রথার প্রতি লোভী মানসিকতা পছন্দ করেননি।']
Similarity Scores: [2.020740032196045, 2.261037826538086, 2.5281548500061035]



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query: কাকে অনুপমের ভাগ্যদেবতা বলে উল্লেখ করা হয়েছে?
Response:  অনুপমের ভাগ্যদেবতা বলে উল্লেখ করা হয়েছে সার্বজনিন আব্জীত ভূমিপুরীর বৈঠক্যে।
(Based on the context: "Anupam's fate has been referred to as Sovereign Ajit Vohra in a meeting with Sarvajanin Abhijit BhumiPuri.")
Expected Answer: মামাকে
Metrics: {'relevance_score': np.float32(0.9308326), 'groundedness_score': np.float32(0.38387933), 'accuracy': np.float32(0.37980494)}
Retrieved Documents: ['Question: অনুপমের মামা কেন হরিশের সঙ্গে কল্যাণীর বিয়ের প্রস্তাব প্রত্যাখ্যান করেন?', 'Question: অনুপমের মামা গহনা পরীক্ষার সময় কী বলেন এবং এটি কী প্রকাশ করে?', 'Answer: উদ্দীপকে পলিশ স্বাধীন মত প্রকাশের সাহস দেখিয়ে বিয়েতে অসম্মতি জানান এবং এর মাধ্যমে তার ব্যক্তিত্বের প্রকাশ ঘটে।']
Similarity Scores: [2.1366825103759766, 2.274994373321533, 2.3612942695617676]



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query: বিয়ের সময় কল্যাণীর প্রকৃত বয়স কত ছিল?
Response:  কল্যাণীর প্রকৃত বয়সটা দেখানো হয়নি।
Expected Answer: ১৫ বছর
Metrics: {'relevance_score': np.float32(0.87211365), 'groundedness_score': np.float32(0.4458154), 'accuracy': np.float32(0.445535)}
Retrieved Documents: ['Answer: উদ্দীপকে পলিশ স্বাধীন মত প্রকাশের সাহস দেখিয়ে বিয়েতে অসম্মতি জানান এবং এর মাধ্যমে তার ব্যক্তিত্বের প্রকাশ ঘটে।', 'Question: অনুপমের মামা কেন হরিশের সঙ্গে কল্যাণীর বিয়ের প্রস্তাব প্রত্যাখ্যান করেন?', 'Answer: অনুপমের মায়ের অতিরিক্ত স্নেহের ফলে সে একজন ব্যক্তিত্বহীন ও নির্ভরশীল ব্যক্তি হয়ে উঠেছে।']
Similarity Scores: [3.0673365592956543, 3.310023069381714, 3.5558202266693115]

