# **BERT based QnA**

Extract Text from PDFs

In [None]:
# Mount Google Drive to access files
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install pdfminer.six python-docx pandas transformers torch

Collecting pytesseract
  Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.10


In [None]:
import os
import re
import pandas as pd
from docx import Document
from pdfminer.high_level import extract_text, extract_pages
from pdfminer.layout import LTTextContainer
from transformers import pipeline

# Function to extract text and links from a PDF file
def extract_text_and_links_from_pdf(pdf_path):
    text_data = []
    links = []

    # Extract text
    for page_layout in extract_pages(pdf_path):
        for element in page_layout:
            if isinstance(element, LTTextContainer):
                text_data.append(element.get_text())

    # Extract links
    def extract_links_from_pdf(pdf_path):
        links = []
        with open(pdf_path, 'rb') as f:
            parser = PDFParser(f)
            doc = PDFDocument(parser)
            for page in PDFPage.create_pages(doc):
                if page.annots:
                    for annot in page.annots:
                        uri = annot.get('URI', None)
                        if uri:
                            links.append(uri)
        return links

    links = extract_links_from_pdf(pdf_path)
    return "\n".join(text_data), links

# Function to extract text and links from a DOCX file
def extract_text_and_links_from_docx(docx_path):
    text_data = []
    links = []

    try:
        doc = Document(docx_path)
        for para in doc.paragraphs:
            text_data.append(para.text)
            # DOCX does not have direct hyperlink attribute, so extract links manually
            for run in para.runs:
                if 'hyperlink' in run._element.xml:
                    link = re.search(r'href="(.*?)"', run._element.xml)
                    if link:
                        links.append(link.group(1))
        return "\n".join(text_data), links
    except Exception as e:
        print(f"Error extracting text from {docx_path}: {e}")
        return "", []

# Function to extract text from an Excel file
def extract_text_from_excel(excel_path):
    try:
        xls = pd.ExcelFile(excel_path)
        full_text = []
        for sheet_name in xls.sheet_names:
            df = pd.read_excel(xls, sheet_name=sheet_name)
            full_text.append(df.to_string())
        return "\n".join(full_text)
    except Exception as e:
        print(f"Error extracting text from {excel_path}: {e}")
        return ""

# Function to clean and preprocess text
def preprocess_text(text):
    text = re.sub(r'\n+', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Function to initialize the QnA model
def initialize_qna_model():
    return pipeline("question-answering", model="deepset/bert-base-cased-squad2")

# Function to answer a user question using the QnA model
def answer_question(qna_model, question, context):
    result = qna_model(question=question, context=context)
    return result['answer']

# Main function to process files and answer questions
def main(file_folder, question):
    qna_model = initialize_qna_model()
    all_text = ""
    all_links = []

    for filename in os.listdir(file_folder):
        file_path = os.path.join(file_folder, filename)
        print(f"Processing file: {file_path}")

        if filename.lower().endswith('.pdf'):
            text, links = extract_text_and_links_from_pdf(file_path)
        elif filename.lower().endswith('.docx'):
            text, links = extract_text_and_links_from_docx(file_path)
        elif filename.lower().endswith('.xlsx') or filename.lower().endswith('.xls'):
            text = extract_text_from_excel(file_path)
            links = []
        else:
            #print(f"Skipping unsupported file format: {filename}")
            continue

        if not text:
            print(f"Failed to extract text from {file_path}")
            continue

        text = preprocess_text(text)
        all_text += text + " "
        all_links.extend(links)

    answer = answer_question(qna_model, question, all_text)
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print("Extracted Links:", all_links)

if __name__ == "__main__":
    # Example usage: Provide a folder containing various file types and ask a question
    file_folder = '/content/drive/My Drive/Proplens/'
    question = "What are the main features of the product?"
    main(file_folder, question)


Some weights of the model checkpoint at deepset/bert-base-cased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Processing file: /content/drive/My Drive/Proplens/facade-catalogue-and-specifications.pdf
Processing file: /content/drive/My Drive/Proplens/Inventory sheet.xlsx
Processing file: /content/drive/My Drive/Proplens/Project links.docx
Processing file: /content/drive/My Drive/Proplens/Sales SOP and policies.docx
Processing file: /content/drive/My Drive/Proplens/Tembusu grand 1 Bed + Study unit plan.png
Processing file: /content/drive/My Drive/Proplens/Tembusu grand 2 Bed +study unit plan.png
Processing file: /content/drive/My Drive/Proplens/Tembusu grand 2 bed unit plan.png
Processing file: /content/drive/My Drive/Proplens/Tembusu grand 3 bed unit plan.png
Processing file: /content/drive/My Drive/Proplens/Tembusu grand 4 Bed unit plan.png
Processing file: /content/drive/My Drive/Proplens/Tembusu grand image.jpeg
Processing file: /content/drive/My Drive/Proplens/Tembusu grand Location map.png
Processing file: /content/drive/My Drive/Proplens/Tembusu grand Site plan.png
Processing file: /conte

Model Evaluation for SQUAD dataset

In [None]:
!pip install datasets scikit-learn




In [None]:
import time
import numpy as np
from datasets import load_dataset
from transformers import pipeline
from sklearn.metrics import accuracy_score, f1_score

# Function to initialize the QnA model
def initialize_qna_model():
    return pipeline("question-answering", model="deepset/bert-base-cased-squad2")

# Function to assess performance and latency on a benchmark dataset
def assess_performance_and_latency():
    # Define the benchmark dataset
    dataset_name = "squad"  # Use a standard benchmark dataset like SQuAD
    num_samples = 100  # Number of samples to test

    # Load the benchmark dataset
    dataset = load_dataset(dataset_name)

    # Extract a subset of the validation data
    validation_data = dataset['validation']
    sample_data = validation_data.select(range(num_samples))

    # Prepare data for evaluation
    contexts = [item['context'] for item in sample_data]
    questions = [item['question'] for item in sample_data]
    answers = [item['answers']['text'][0] for item in sample_data]

    qna_model = initialize_qna_model()

    predictions = []
    start_time = time.time()

    for context, question in zip(contexts, questions):
        result = qna_model(question=question, context=context)
        predictions.append(result['answer'])

    end_time = time.time()
    latency = end_time - start_time

    # Calculate performance metrics
    accuracy = np.mean([a == p for a, p in zip(answers, predictions)])
    f1 = f1_score(answers, predictions, average='weighted')

    print(f"Latency: {latency:.2f} seconds")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"F1 Score: {f1:.2f}")

if __name__ == "__main__":
    assess_performance_and_latency()


Some weights of the model checkpoint at deepset/bert-base-cased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Latency: 58.09 seconds
Accuracy: 0.66
F1 Score: 0.65


FASTAPI APP

In [None]:
!pip install fastapi uvicorn pyngrok datasets transformers nest_asyncio




In [None]:
%%writefile api.py
import time
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline
from datasets import load_dataset

app = FastAPI()

# Function to initialize the QnA model
def initialize_qna_model():
    return pipeline("question-answering", model="deepset/bert-base-cased-squad2")

# Load the SQuAD dataset
dataset_name = "squad"
dataset = load_dataset(dataset_name)
validation_data = dataset['validation']

# Extract a subset of the validation data for demo purposes
num_samples = 100
sample_data = validation_data.select(range(num_samples))
contexts = [item['context'] for item in sample_data]
questions = [item['question'] for item in sample_data]
answers = [item['answers']['text'][0] for item in sample_data]

qna_model = initialize_qna_model()

class QuestionRequest(BaseModel):
    question: str

@app.post("/answer/")
async def get_answer(request: QuestionRequest):
    question = request.question
    context = " ".join(contexts)  # Join all contexts for this example
    start_time = time.time()

    try:
        result = qna_model(question=question, context=context)
        answer = result['answer']
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing request: {e}")

    latency = time.time() - start_time
    return {"answer": answer, "latency": latency}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)


Overwriting api.py


In [None]:
!nohup python3 -m uvicorn api:app --host 0.0.0.0 --port 8000 > output.log 2>&1 &


In [None]:
# Step 1: Install necessary libraries
!pip install fastapi uvicorn pyngrok nest_asyncio

# Step 2: Set up ngrok authtoken (replace 'YOUR_AUTHTOKEN_HERE' with your actual authtoken)
!ngrok authtoken 2jPTIbWSfS7g4Vu0dllTblPJxNv_4apQJqVsKySceNCJ1uQHL

# Step 3: Import necessary modules
from pyngrok import ngrok
from fastapi import FastAPI
import uvicorn
import nest_asyncio

# Step 4: Patch the event loop
nest_asyncio.apply()

# Step 5: Create a FastAPI app
app = FastAPI()

@app.get("/")
def read_root():
    return {"Hello": "World"}

# Step 6: Set up a tunnel to the FastAPI server
port = 8001  # Use a different port
public_url = ngrok.connect(port)
print(f"Public URL: {public_url}")

# Step 7: Start the FastAPI server
try:
    uvicorn.run(app, host='0.0.0.0', port=port)
except Exception as e:
    print(f"Error starting server: {e}")


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Public URL: NgrokTunnel: "https://b437-34-145-195-49.ngrok-free.app" -> "http://localhost:8001"


INFO:     Started server process [29173]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8001 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [29173]


In [None]:
!ngrok authtoken 2jPTIbWSfS7g4Vu0dllTblPJxNv_4apQJqVsKySceNCJ1uQHL  # Replace with your actual ngrok authtoken


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from pyngrok import ngrok
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
import nest_asyncio
from transformers import pipeline
import time

# Patch the event loop
nest_asyncio.apply()

# Initialize the QnA model
def initialize_qna_model():
    return pipeline("question-answering", model="deepset/bert-base-cased-squad2")

qna_model = initialize_qna_model()

# Create a FastAPI app
app = FastAPI()

# Define request and response models
class QnARequest(BaseModel):
    context: str
    question: str

class QnAResponse(BaseModel):
    answer: str
    latency: float

# Define the QnA endpoint
@app.post("/qna", response_model=QnAResponse)
async def get_answer(request: QnARequest):
    start_time = time.time()
    result = qna_model(question=request.question, context=request.context)
    latency = time.time() - start_time
    return QnAResponse(answer=result['answer'], latency=latency)

# Set up a tunnel to the FastAPI server
port = 8000  # Use port 8000
public_url = ngrok.connect(port)
print(f"Public URL: {public_url}")

# Start the FastAPI server
uvicorn.run(app, host='0.0.0.0', port=port)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of the model checkpoint at deepset/bert-base-cased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a Be

Public URL: NgrokTunnel: "https://5fb3-34-145-195-49.ngrok-free.app" -> "http://localhost:8000"
INFO:     2402:e280:3d6e:1a10:3845:7748:fefd:ec84:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     2402:e280:3d6e:1a10:3845:7748:fefd:ec84:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     2402:e280:3d6e:1a10:3845:7748:fefd:ec84:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     2402:e280:3d6e:1a10:3845:7748:fefd:ec84:0 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [87395]


# **RAG based QnA**

In [None]:
# Installing Required Libraries
%pip install python-docx
%pip install python-pptx
%pip install PyPDF2
%pip install langchain
%pip install langchain_community
%pip install langchain_google_genai
%pip install langchain_text_splitters
%pip install sentence-transformers
%pip install faiss-cpu
%pip install cohere

Collecting python-pptx
  Downloading python_pptx-0.6.23-py3-none-any.whl (471 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.6/471.6 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting XlsxWriter>=0.5.7 (from python-pptx)
  Downloading XlsxWriter-3.2.0-py3-none-any.whl (159 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.9/159.9 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: XlsxWriter, python-pptx
Successfully installed XlsxWriter-3.2.0 python-pptx-0.6.23
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting langchain
  Downloading langchain-0.2.10-py3-none-any.whl (990 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.0/990.0 kB[0m [31m14.8 MB/s

Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-3.0.1
Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0.post1
Collecting cohere
  Downloading cohere-5.6.1-py3-none-any.whl (178 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.5/178.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting boto3<2.0.0,>=1.34.0 (from cohere)
  Downloading boto3-1.34.145-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [6]:
# necessary Imports
from docx import Document
from PyPDF2 import PdfReader
from pptx import Presentation
from langchain_community.llms import Cohere
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import AIMessage, HumanMessage
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts  import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder

In [4]:
# Mount Google Drive to access files
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
from PyPDF2 import PdfReader
from docx import Document
import openpyxl

# Define the folder path
folder_path = '/content/drive/My Drive/Proplens/'

# Initialize empty strings to store the text
pdf_text = ""
doc_text = ""
excel_text = ""
file_list = []  # List to keep track of processed files

# Function to read PDF files
def read_pdf(file_path):
    text = ""
    with open(file_path, 'rb') as pdf_file:
        pdf_reader = PdfReader(pdf_file)
        for page in pdf_reader.pages:
            text += page.extract_text() + '\n'
    return text

# Function to read DOCX files
def read_docx(file_path):
    text = ""
    doc_file = Document(file_path)
    for paragraph in doc_file.paragraphs:
        text += paragraph.text + '\n'
    return text

# Function to read Excel files
def read_excel(file_path):
    text = ""
    wb = openpyxl.load_workbook(file_path)
    sheet = wb.active
    for row in sheet.iter_rows(values_only=True):
        text += "\t".join(map(str, row)) + "\n"
    return text

# Process all files in the folder
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)

    if filename.endswith('.pdf'):
        pdf_text += read_pdf(file_path)
        file_list.append(filename)
    elif filename.endswith('.docx'):
        doc_text += read_docx(file_path)
        file_list.append(filename)
    elif filename.endswith('.xlsx'):
        excel_text += read_excel(file_path)
        file_list.append(filename)

# Combine all text
all_text = pdf_text + '\n' + doc_text + '\n' + excel_text

# Output the length of the combined text and list of processed files
print("Length of combined text:", len(all_text))
print("Files processed:")
for file in file_list:
    print(file)

# Optional: Save the combined text to a file
with open('/content/drive/My Drive/Proplens/combined_text.txt', 'w') as file:
    file.write(all_text)


Length of combined text: 164744
Files processed:
facade-catalogue-and-specifications.pdf
Inventory sheet.xlsx
Project links.docx
Sales SOP and policies.docx
TEMBUSU GRAND_MAIN BROCHURE.pdf
Checklist for purchase of property under construction from developers.pdf
gov.sg _ Property Tax on Residential Property.pdf
Sales schemes.xlsx


In [7]:
# splitting the text into chunks for embeddings creation

text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap = 200, # This is helpul to handle the data loss while chunking.
        length_function = len,
        separators=['\n', '\n\n', ' ', '']
    )

chunks = text_splitter.split_text(text = all_text)


In [8]:
len(chunks)

# Initializing embeddings model

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# Indexing the data using FAISS
vectorstore = FAISS.from_texts(chunks, embedding = embeddings)


  warn_deprecated(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [9]:
# creating retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                not contained in the context, say "answer not available in context" \n\n
                Context: \n {context}?\n
                Question: \n {question} \n
                Answer:"""

prompt = PromptTemplate.from_template(template=prompt_template)

In [10]:
# function to create a single string of relevant documents given by Faiss.
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# RAG Chain

def generate_answer(question):
    cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key = os.getenv('J3ryImGgctIBh5Lbb06reA2PrmNCy98L2FrbegkV'))

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | cohere_llm
        | StrOutputParser()
    )

    return rag_chain.invoke(question)

In [10]:
# Ensure API key is set in the environment
os.environ['COHERE_API_KEY'] = 'J3ryImGgctIBh5Lbb06reA2PrmNCy98L2FrbegkV'

In [11]:
import os
ans = generate_answer("Describe nature scenes at tembusu grand?")
print(ans)

  warn_deprecated(


 Tembusu Grand is a modern development that features a unique stamp of quiet and understated elegance. Its towering architecture is inspired by the Tembusu heritage tree, featuring colors, textures, and an iconic crown design that mirrors the tree's sculptural form. The development is nestled within a lush, evergreen landscape featuring thematic gardens and cascading water features. 

The Essence of Katong Living artist's impression highlights the Tembusu Grand's thematic gardens and cascading water features. The statement of modern architecture with punctuations of nature includes a yoga studio, tennis court, and gymnasium overlooking a lap pool, social spaces like the Leisure Lawn and Co-Working Lounge, and the Arrival Courtyard with lush greenery and water features. The Pets Corner provides opportunities for outdoor fun, while the Scent and Herbs Gardens offer spaces to relax and unwind. 

The Tembusu Club features an Entertainment Room and Private Dining space, while the Kids' Play

Performance evaluation for SQUAD dataset

In [11]:
def generate_answer(question, retriever, prompt):
    cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key=os.getenv('COHERE_API_KEY'))

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | cohere_llm
        | StrOutputParser()
    )

    result = rag_chain.invoke(question)

    # Ensure result is in the expected format
    if isinstance(result, dict) and 'answer' in result:
        return result
    else:
        # Handle unexpected format
        return {'answer': result}  # Or raise an exception if that's preferred


In [13]:
import time
import numpy as np
from datasets import load_dataset
from transformers import pipeline
from sklearn.metrics import accuracy_score, f1_score
import logging
import os

# Function to evaluate the model
def evaluate_model(dataset, generate_answer_func, retriever, prompt):
    start_time = time.time()

    predictions = []
    true_answers = []

    for example in dataset['validation']:
        question = example['question']
        context = example['context']
        true_answer = example['answers']['text'][0]

        # Get prediction from the model
        try:
            logging.info(f"Processing question: {question}")
            result = generate_answer_func(question, retriever, prompt)
            predicted_answer = result.get('answer', '')
            predictions.append(predicted_answer)
            true_answers.append(true_answer)
        except Exception as e:
            logging.error(f"Error processing question: {e}")

    end_time = time.time()

    # Calculate Latency
    latency = end_time - start_time

    # Calculate Accuracy and F1 Score
    def compute_exact_match(pred, true):
        return int(pred.strip().lower() == true.strip().lower())

    accuracy = np.mean([compute_exact_match(pred, true) for pred, true in zip(predictions, true_answers)])

    def compute_f1(pred, true):
        pred_tokens = set(pred.split())
        true_tokens = set(true.split())
        intersection = pred_tokens.intersection(true_tokens)
        if len(pred_tokens) == 0 or len(true_tokens) == 0:
            return 0.0
        precision = len(intersection) / len(pred_tokens)
        recall = len(intersection) / len(true_tokens)
        if precision + recall == 0:
            return 0.0
        return 2 * (precision * recall) / (precision + recall)

    f1 = np.mean([compute_f1(pred, true) for pred, true in zip(predictions, true_answers)])

    return latency, accuracy, f1

# Example usage
if __name__ == "__main__":
    import os

    logging.basicConfig(level=logging.INFO)

    # Ensure API key is set in the environment
    os.environ['COHERE_API_KEY'] = 'J3ryImGgctIBh5Lbb06reA2PrmNCy98L2FrbegkV'

    # Load the dataset
    dataset = load_dataset("squad")

    # Setup your vectorstore and prompt template
    # For example:
    # vectorstore = FAISS.from_texts(chunks, embedding=embeddings)
    # retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

    prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                not contained in the context, say "answer not available in context" \n\n
                Context: \n {context}?\n
                Question: \n {question} \n
                Answer:"""

    prompt = PromptTemplate.from_template(template=prompt_template)

    # Call the evaluate_model function
    try:
        latency, accuracy, f1 = evaluate_model(dataset, generate_answer, retriever, prompt)
        print(f"Latency: {latency:.2f} seconds")
        print(f"Accuracy: {accuracy:.2f}")
        print(f"F1 Score: {f1:.2f}")
    except Exception as e:
        logging.error(f"Error evaluating model: {e}")




KeyboardInterrupt: 

In [16]:
import time
import numpy as np
from datasets import load_dataset

def evaluate_model(retriever, generate_answer_function, dataset_name='squad', num_samples=100):
    # Load the benchmark dataset
    print("Loading dataset...")
    dataset = load_dataset(dataset_name)

    # Extract a subset of the validation data
    validation_data = dataset['validation']
    sample_data = validation_data.select(range(num_samples))

    contexts = [item['context'] for item in sample_data]
    questions = [item['question'] for item in sample_data]
    true_answers = [item['answers']['text'][0] for item in sample_data]

    predictions = []

    start_time = time.time()

    for i, (context, question) in enumerate(zip(contexts, questions)):
        print(f"Processing question {i+1}/{num_samples}")
        # Retrieve relevant documents for the question
        search_results = retriever.get_relevant_documents(question)

        if not search_results:
            print(f"No search results for question: {question}")
            continue

        relevant_docs = search_results
        formatted_context = "\n\n".join(doc.page_content for doc in relevant_docs)

        # Generate an answer
        result = generate_answer_function(question, formatted_context)

        if not result:
            print(f"No answer generated for question: {question}")
            predictions.append("No answer generated")
        else:
            predictions.append(result)

    end_time = time.time()

    # Calculate Latency
    latency = end_time - start_time

    # Calculate Accuracy and F1 Score
    def compute_exact_match(pred, true):
        return int(pred.strip().lower() == true.strip().lower())

    accuracy = np.mean([compute_exact_match(pred, true) for pred, true in zip(predictions, true_answers)])

    def compute_f1(pred, true):
        pred_tokens = set(pred.split())
        true_tokens = set(true.split())
        intersection = pred_tokens.intersection(true_tokens)
        if len(pred_tokens) == 0 or len(true_tokens) == 0:
            return 0.0
        precision = len(intersection) / len(pred_tokens)
        recall = len(intersection) / len(true_tokens)
        if precision + recall == 0:
            return 0.0
        return 2 * (precision * recall) / (precision + recall)

    f1 = np.mean([compute_f1(pred, true) for pred, true in zip(predictions, true_answers)])

    print(f"Latency: {latency:.2f} seconds")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"F1 Score: {f1:.2f}")

    return predictions

# Example usage
if __name__ == "__main__":
    # Define your `generate_answer` function with context parameter
    def generate_answer_with_context(question, context):
        # Mock implementation for demonstration purposes
        return "Generated answer"

    # Evaluate the model
    evaluate_model(retriever, generate_answer_with_context)


Loading dataset...
Processing question 1/100
Processing question 2/100


  warn_deprecated(


Processing question 3/100
Processing question 4/100
Processing question 5/100
Processing question 6/100
Processing question 7/100
Processing question 8/100
Processing question 9/100
Processing question 10/100
Processing question 11/100
Processing question 12/100
Processing question 13/100
Processing question 14/100
Processing question 15/100
Processing question 16/100
Processing question 17/100
Processing question 18/100
Processing question 19/100
Processing question 20/100
Processing question 21/100
Processing question 22/100
Processing question 23/100
Processing question 24/100
Processing question 25/100
Processing question 26/100
Processing question 27/100
Processing question 28/100
Processing question 29/100
Processing question 30/100
Processing question 31/100
Processing question 32/100
Processing question 33/100
Processing question 34/100
Processing question 35/100
Processing question 36/100
Processing question 37/100
Processing question 38/100
Processing question 39/100
Processi

In [26]:
import time
import numpy as np
from datasets import load_dataset
from sklearn.metrics import accuracy_score, f1_score

from langchain_core.output_parsers import StrOutputParser

# Define the generate_answer_with_context function
def generate_answer_with_context(question, context):
    cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key=os.getenv('J3ryImGgctIBh5Lbb06reA2PrmNCy98L2FrbegkV'))

    # Define the prompt template
    prompt_template = f"""
    Context: {context}
    Question: {question}
    Answer:"""

    # Generate a response
    response = cohere_llm.generate([prompt_template])

    # Check the response format
    print("Response:", response)

    # Extract the text from the response
    if isinstance(response, list) and len(response) > 0 and 'text' in response[0]:
        return response[0]['text'].strip()
    else:
        return "No answer generated"

# Define the evaluate_model function
def evaluate_model(retriever, generate_answer_function, dataset_name='squad', num_samples=100):
    # Load the benchmark dataset
    print("Loading dataset...")
    dataset = load_dataset(dataset_name)

    # Extract a subset of the validation data
    validation_data = dataset['validation']
    sample_data = validation_data.select(range(num_samples))

    contexts = [item['context'] for item in sample_data]
    questions = [item['question'] for item in sample_data]
    true_answers = [item['answers']['text'][0] for item in sample_data]

    predictions = []

    start_time = time.time()

    for i, (context, question) in enumerate(zip(contexts, questions), start=1):
        print(f"Processing question {i}/{num_samples}")
        # Retrieve relevant documents for the question
        search_results = retriever.invoke(question)

        # If search results are a list, use it directly
        relevant_docs = search_results
        formatted_context = format_docs(relevant_docs)

        # Generate an answer
        result = generate_answer_function(question, formatted_context)
        predictions.append(result)

    end_time = time.time()

    # Calculate Latency
    latency = end_time - start_time

    # Calculate Accuracy and F1 Score
    def compute_exact_match(pred, true):
        return int(pred.strip().lower() == true.strip().lower())

    accuracy = np.mean([compute_exact_match(pred, true) for pred, true in zip(predictions, true_answers)])

    def compute_f1(pred, true):
        pred_tokens = set(pred.split())
        true_tokens = set(true.split())
        intersection = pred_tokens.intersection(true_tokens)
        if len(pred_tokens) == 0 or len(true_tokens) == 0:
            return 0.0
        precision = len(intersection) / len(pred_tokens)
        recall = len(intersection) / len(true_tokens)
        if precision + recall == 0:
            return 0.0
        return 2 * (precision * recall) / (precision + recall)

    f1 = np.mean([compute_f1(pred, true) for pred, true in zip(predictions, true_answers)])

    print(f"Latency: {latency:.2f} seconds")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"F1 Score: {f1:.2f}")

# Example usage
if __name__ == "__main__":
    # Define your `generate_answer` function with context parameter
    def generate_answer_with_context(question, context):
        cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key=os.getenv('zqr9XSBYfGM0p2CvhySt971mIWHELJaMK85x6SLO'))

        prompt_template = f"""
        Context: {context}
        Question: {question}
        Answer:"""

        response = cohere_llm.generate([prompt_template])
        print("Response:", response)

        if isinstance(response, list) and len(response) > 0 and 'text' in response[0]:
            return response[0]['text'].strip()
        else:
            return "No answer generated"

    # Evaluate the model
    evaluate_model(retriever, generate_answer_with_context)


Loading dataset...
Processing question 1/100
Response: generations=[[Generation(text=' The answer is The Denver Broncos represented the AFC at Super Bowl 50')]] llm_output=None run=[RunInfo(run_id=UUID('c1ea10a2-7145-4302-8d8b-d5bac67f6de7'))]
Processing question 2/100
Response: generations=[[Generation(text=' Carolina Panthers')]] llm_output=None run=[RunInfo(run_id=UUID('761facbc-de76-46be-abfd-c7f0cfe5c6a4'))]
Processing question 3/100
Response: generations=[[Generation(text=" The Super Bowl is an annual American football game that determines the league champion. The 50th Super Bowl was played in 2016, and was won by the Denver Broncos, who defeated the Carolina Panthers by a score of 24–10. The game was played on February 7, 2016, at Levi's Stadium in Santa Clara, California, and was televised on CBS. ")]] llm_output=None run=[RunInfo(run_id=UUID('c75d242b-146f-4cfc-9a84-4ca6c6c9d220'))]
Processing question 4/100
Response: generations=[[Generation(text=' The Denver Broncos won Supe

In [28]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [29]:
from sklearn.metrics import precision_score, recall_score
from rouge import Rouge
from nltk.translate.bleu_score import sentence_bleu
import numpy as np

def evaluate_model_extended(retriever, generate_answer_function, dataset_name='squad', num_samples=100):
    # Load the benchmark dataset
    print("Loading dataset...")
    dataset = load_dataset(dataset_name)

    # Extract a subset of the validation data
    validation_data = dataset['validation']
    sample_data = validation_data.select(range(num_samples))

    contexts = [item['context'] for item in sample_data]
    questions = [item['question'] for item in sample_data]
    true_answers = [item['answers']['text'][0] for item in sample_data]

    predictions = []

    start_time = time.time()

    for i, (context, question) in enumerate(zip(contexts, questions), start=1):
        print(f"Processing question {i}/{num_samples}")
        # Retrieve relevant documents for the question
        search_results = retriever.invoke(question)
        relevant_docs = search_results
        formatted_context = format_docs(relevant_docs)

        # Generate an answer
        result = generate_answer_function(question, formatted_context)
        predictions.append(result)

    end_time = time.time()

    # Calculate Latency
    latency = end_time - start_time

    # Calculate Precision and Recall
    def compute_precision_recall(pred, true):
        pred_tokens = set(pred.split())
        true_tokens = set(true.split())
        intersection = pred_tokens.intersection(true_tokens)
        precision = len(intersection) / len(pred_tokens) if pred_tokens else 0
        recall = len(intersection) / len(true_tokens) if true_tokens else 0
        return precision, recall

    precisions, recalls = zip(*[compute_precision_recall(pred, true) for pred, true in zip(predictions, true_answers)])
    avg_precision = np.mean(precisions)
    avg_recall = np.mean(recalls)

    # Calculate ROUGE Scores
    rouge = Rouge()
    rouge_scores = rouge.get_scores(predictions, true_answers, avg=True)

    # Calculate BLEU Scores
    bleu_scores = [sentence_bleu([true.split()], pred.split()) for pred, true in zip(predictions, true_answers)]
    avg_bleu = np.mean(bleu_scores)

    # Exact Match
    def compute_exact_match(pred, true):
        return int(pred.strip().lower() == true.strip().lower())

    accuracy = np.mean([compute_exact_match(pred, true) for pred, true in zip(predictions, true_answers)])

    print(f"Latency: {latency:.2f} seconds")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {avg_precision:.2f}")
    print(f"Recall: {avg_recall:.2f}")
    print(f"ROUGE Scores: {rouge_scores}")
    print(f"Average BLEU Score: {avg_bleu:.2f}")

# Example usage
if __name__ == "__main__":
    # Define your `generate_answer` function with context parameter
    def generate_answer_with_context(question, context):
        cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key=os.getenv('zqr9XSBYfGM0p2CvhySt971mIWHELJaMK85x6SLO'))

        prompt_template = f"""
        Context: {context}
        Question: {question}
        Answer:"""

        response = cohere_llm.generate([prompt_template])
        print("Response:", response)

        if isinstance(response, list) and len(response) > 0 and 'text' in response[0]:
            return response[0]['text'].strip()
        else:
            return "No answer generated"

    # Evaluate the model
    evaluate_model_extended(retriever, generate_answer_with_context)


Loading dataset...
Processing question 1/100
Response: generations=[[Generation(text=' The answer is The Denver Broncos represented the AFC at Super Bowl 50')]] llm_output=None run=[RunInfo(run_id=UUID('129f0ae3-fe7b-472c-874a-f0bf4d603ea4'))]
Processing question 2/100
Response: generations=[[Generation(text=' Carolina Panthers')]] llm_output=None run=[RunInfo(run_id=UUID('a33425f1-f8e1-45a0-b603-462c74402ea0'))]
Processing question 3/100
Response: generations=[[Generation(text=" The Super Bowl is an annual American football game that determines the league champion. The 50th Super Bowl was played in 2016, and was won by the Denver Broncos, who defeated the Carolina Panthers by a score of 24–10. The game was played on February 7, 2016 at Levi's Stadium in Santa Clara, California. ")]] llm_output=None run=[RunInfo(run_id=UUID('c095c833-8019-40ec-9e11-71076a0a04d2'))]
Processing question 4/100
Response: generations=[[Generation(text=' The Denver Broncos won Super Bowl 50, defeating the Ca

In [32]:
import time
import numpy as np
from datasets import load_dataset
from sklearn.metrics import precision_score, recall_score
from rouge import Rouge
from nltk.translate.bleu_score import sentence_bleu
import os

def format_docs(docs):
    # Dummy implementation, replace with actual document formatting logic
    return "\n\n".join(doc.page_content for doc in docs)

def evaluate_model_extended(retriever, generate_answer_function, dataset_name='squad', num_samples=100):
    # Load the benchmark dataset
    print("Loading dataset...")
    dataset = load_dataset(dataset_name)

    # Extract a subset of the validation data
    validation_data = dataset['validation']
    sample_data = validation_data.select(range(num_samples))

    contexts = [item['context'] for item in sample_data]
    questions = [item['question'] for item in sample_data]
    true_answers = [item['answers']['text'][0] for item in sample_data]

    predictions = []

    start_time = time.time()

    for i, (context, question) in enumerate(zip(contexts, questions), start=1):
        print(f"Processing question {i}/{num_samples}")
        # Retrieve relevant documents for the question
        search_results = retriever.invoke(question)
        relevant_docs = search_results
        formatted_context = format_docs(relevant_docs)

        # Generate an answer
        result = generate_answer_function(question, formatted_context)
        predictions.append(result)

    end_time = time.time()

    # Calculate Latency
    latency = end_time - start_time

    # Calculate Precision and Recall
    def compute_precision_recall(pred, true):
        pred_tokens = set(pred.split())
        true_tokens = set(true.split())
        intersection = pred_tokens.intersection(true_tokens)
        precision = len(intersection) / len(pred_tokens) if pred_tokens else 0
        recall = len(intersection) / len(true_tokens) if true_tokens else 0
        return precision, recall

    precisions, recalls = zip(*[compute_precision_recall(pred, true) for pred, true in zip(predictions, true_answers)])
    avg_precision = np.mean(precisions)
    avg_recall = np.mean(recalls)

    # Calculate ROUGE Scores
    rouge = Rouge()
    rouge_scores = rouge.get_scores(predictions, true_answers, avg=True)

    # Calculate BLEU Scores
    bleu_scores = [sentence_bleu([true.split()], pred.split()) for pred, true in zip(predictions, true_answers)]
    avg_bleu = np.mean(bleu_scores)

    # Exact Match
    def compute_exact_match(pred, true):
        return int(pred.strip().lower() == true.strip().lower())

    accuracy = np.mean([compute_exact_match(pred, true) for pred, true in zip(predictions, true_answers)])

    print(f"Latency: {latency:.2f} seconds")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {avg_precision:.2f}")
    print(f"Recall: {avg_recall:.2f}")
    print(f"ROUGE Scores: {rouge_scores}")
    print(f"Average BLEU Score: {avg_bleu:.2f}")

# Example usage
if __name__ == "__main__":
    # Define your `generate_answer` function with context parameter
    def generate_answer_with_context(question, context):
        cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key=os.getenv('COHERE_API_KEY'))

        prompt_template = f"""
        Context: {context}
        Question: {question}
        Answer:"""

        response = cohere_llm.generate([prompt_template])
        print("Response:", response)

        if isinstance(response, list) and len(response) > 0 and 'text' in response[0]:
            return response[0]['text'].strip()
        else:
            return "No answer generated"

    # Evaluate the model
    evaluate_model_extended(retriever, generate_answer_with_context)


Loading dataset...
Processing question 1/100
Response: generations=[[Generation(text=' The answer is The Denver Broncos represented the AFC at Super Bowl 50')]] llm_output=None run=[RunInfo(run_id=UUID('b5deb7bd-fa2f-4e80-8238-cd19645ed7bd'))]
Processing question 2/100
Response: generations=[[Generation(text=' The Carolina Panthers represented the NFC at Super Bowl 50, where they lost to the Denver Broncos by a score of 24–10. ')]] llm_output=None run=[RunInfo(run_id=UUID('a1c33623-a3fe-4902-bc6b-4ef1b1b80c7b'))]
Processing question 3/100
Response: generations=[[Generation(text=" The Super Bowl is an annual American football game that determines the league champion. The 50th Super Bowl was played in 2016, and was won by the Denver Broncos, who defeated the Carolina Panthers by a score of 24–10. The game was played on February 7, 2016, at Levi's Stadium in Santa Clara, California, and was televised on CBS. ")]] llm_output=None run=[RunInfo(run_id=UUID('dfe07a38-a762-45f6-b863-49d2c8b619

use other metrics rather than confusion matrix as RAG is a generative AI model

In [34]:
!pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: bert_score
Successfully installed bert_score-0.3.13


In [38]:
import time
import numpy as np
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util
from bert_score import score
from sklearn.metrics.pairwise import cosine_similarity


def evaluate_model_with_similarity(retriever, generate_answer_function, dataset_name='squad', num_samples=100):
    # Load the benchmark dataset
    print("Loading dataset...")
    dataset = load_dataset(dataset_name)

    # Extract a subset of the validation data
    validation_data = dataset['validation']
    sample_data = validation_data.select(range(num_samples))

    contexts = [item['context'] for item in sample_data]
    questions = [item['question'] for item in sample_data]
    true_answers = [item['answers']['text'][0] for item in sample_data]

    predictions = []

    start_time = time.time()

    for i, (context, question) in enumerate(zip(contexts, questions), start=1):
        print(f"Processing question {i}/{num_samples}")
        # Retrieve relevant documents for the question
        search_results = retriever.invoke(question)
        relevant_docs = search_results
        formatted_context = format_docs(relevant_docs)

        # Generate an answer
        result = generate_answer_function(question, formatted_context)
        predictions.append(result)

    end_time = time.time()

    # Calculate Latency
    latency = end_time - start_time

    # Calculate Similarity Metrics
    def calculate_cosine_similarity(predictions, true_answers):
        model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        pred_embeddings = model.encode(predictions)
        true_embeddings = model.encode(true_answers)
        similarity_scores = cosine_similarity(pred_embeddings, true_embeddings)
        avg_similarity = np.mean(np.diagonal(similarity_scores))
        return avg_similarity

    def calculate_embedding_similarity(predictions, true_answers):
        model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        pred_embeddings = model.encode(predictions, convert_to_tensor=True)
        true_embeddings = model.encode(true_answers, convert_to_tensor=True)
        similarity_scores = util.pytorch_cos_sim(pred_embeddings, true_embeddings)
        avg_similarity = np.mean(similarity_scores.numpy())
        return avg_similarity

    def calculate_bertscore(predictions, true_answers):
        P, R, F1 = score(predictions, true_answers, lang='en', verbose=True)
        avg_f1 = F1.mean().item()
        return avg_f1

    avg_cosine_similarity = calculate_cosine_similarity(predictions, true_answers)
    avg_embedding_similarity = calculate_embedding_similarity(predictions, true_answers)
    avg_bertscore = calculate_bertscore(predictions, true_answers)

    print(f"Latency: {latency:.2f} seconds")
    print(f"Average Cosine Similarity: {avg_cosine_similarity:.2f}")
    print(f"Average Embedding Similarity: {avg_embedding_similarity:.2f}")
    print(f"Average BERTScore: {avg_bertscore:.2f}")

# Example usage
if __name__ == "__main__":
    def generate_answer_with_context(question, context):
        cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key=os.getenv('COHERE_API_KEY'))

        prompt_template = f"""
        Context: {context}
        Question: {question}
        Answer:"""

        response = cohere_llm.generate([prompt_template])
        print("Response:", response)

        if isinstance(response, list) and len(response) > 0 and 'text' in response[0]:
            return response[0]['text'].strip()
        else:
            return "No answer generated"

    # Evaluate the model
    evaluate_model_with_similarity(retriever, generate_answer_with_context)


Loading dataset...
Processing question 1/100
Response: generations=[[Generation(text=' The answer is The Denver Broncos represented the AFC at Super Bowl 50')]] llm_output=None run=[RunInfo(run_id=UUID('d1489496-6a4b-4b8f-b082-85630b5d3859'))]
Processing question 2/100
Response: generations=[[Generation(text=' Carolina Panthers')]] llm_output=None run=[RunInfo(run_id=UUID('3d02e62f-4d1e-406a-8c71-e98ce3e273d3'))]
Processing question 3/100
Response: generations=[[Generation(text=" The Super Bowl is an annual American football game that determines the league champion. The 50th Super Bowl was played in 2016, and was won by the Denver Broncos, who defeated the Carolina Panthers by a score of 24–10. The game was played on February 7, 2016, at Levi's Stadium in Santa Clara, California, and was televised on CBS. ")]] llm_output=None run=[RunInfo(run_id=UUID('32e23775-7956-45a1-9072-6b491da3f2fa'))]
Processing question 4/100
Response: generations=[[Generation(text=' The Denver Broncos won Supe

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/2 [00:00<?, ?it/s]

done in 7.35 seconds, 13.60 sentences/sec
Latency: 315.91 seconds
Average Cosine Similarity: 0.12
Average Embedding Similarity: 0.12
Average BERTScore: 0.82


# FAST API

In [51]:
import time
import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from datasets import load_dataset
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.llms import Cohere


app = FastAPI()

# Initialize your embeddings model
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# Load and index the data
# splitting the text into chunks for embeddings creation

text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap = 200, # This is helpul to handle the data loss while chunking.
        length_function = len,
        separators=['\n', '\n\n', ' ', '']
    )

chunks = text_splitter.split_text(text = all_text)
vectorstore = FAISS.from_texts(chunks, embedding = embeddings)

# Create the retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# Define the prompt template
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                not contained in the context, say "answer not available in context" \n\n
                Context: \n {context}?\n
                Question: \n {question} \n
                Answer:"""

prompt = PromptTemplate.from_template(template=prompt_template)

# Define the function to generate an answer
def generate_answer(question):
    cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key = os.getenv('zqr9XSBYfGM0p2CvhySt971mIWHELJaMK85x6SLO'))

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | cohere_llm
        | StrOutputParser()
    )

    #return rag_chain.invoke(question)

    result = rag_chain.invoke(question)

    # Ensure result is in the expected format
    if isinstance(result, dict) and 'answer' in result:
        return result['answer']
    else:
        # Handle unexpected format
        return 'No answer generated'  # Or raise an exception if that's preferred

# Load the SQuAD dataset
dataset_name = "squad"
dataset = load_dataset(dataset_name)
validation_data = dataset['validation']

# Extract a subset of the validation data for demo purposes
num_samples = 100
sample_data = validation_data.select(range(num_samples))
contexts = [item['context'] for item in sample_data]
questions = [item['question'] for item in sample_data]
true_answers = [item['answers']['text'][0] for item in sample_data]

# Define a class for the input data
class QuestionRequest(BaseModel):
    question: str

# Define the endpoint to get an answer
@app.post("/answer/")
async def get_answer(request: QuestionRequest):
    question = request.question

    # Retrieve relevant documents for the question
    search_results = retriever.invoke(question)
    relevant_docs = search_results
    formatted_context = "\n\n".join(doc.page_content for doc in relevant_docs)

    start_time = time.time()

    try:
        # Generate an answer using your RAG model
        answer = generate_answer(question, retriever, prompt)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing request: {e}")

    latency = time.time() - start_time
    return {"answer": answer, "latency": latency}

# Define the endpoint to evaluate latency
@app.get("/evaluate_latency/")
async def evaluate_latency():
    total_latency = 0
    num_samples = len(questions)

    for i, (context, question) in enumerate(zip(contexts, questions), start=1):
        formatted_context = "\n\n".join(contexts)  # Join all contexts for evaluation
        start_time = time.time()

        try:
            generate_answer(question, retriever, prompt)
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error processing request: {e}")

        latency = time.time() - start_time
        total_latency += latency

    avg_latency = total_latency / num_samples
    return {"Average Latency": avg_latency}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)


RuntimeError: asyncio.run() cannot be called from a running event loop

In [56]:
%%writefile api.py
import nest_asyncio
import uvicorn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from datasets import load_dataset
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.llms import Cohere


app = FastAPI()

# Apply the workaround for Jupyter notebooks
nest_asyncio.apply()

# Initialize your embeddings model
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# Load and index the data
# splitting the text into chunks for embeddings creation

text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap = 200, # This is helpul to handle the data loss while chunking.
        length_function = len,
        separators=['\n', '\n\n', ' ', '']
    )

chunks = text_splitter.split_text(text = all_text)
vectorstore = FAISS.from_texts(chunks, embedding=embeddings)

# Create the retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# Define the prompt template
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                not contained in the context, say "answer not available in context" \n\n
                Context: \n {context}?\n
                Question: \n {question} \n
                Answer:"""

prompt = PromptTemplate.from_template(template=prompt_template)

# Define the function to generate an answer

def generate_answer(question):
    cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key = os.getenv('J3ryImGgctIBh5Lbb06reA2PrmNCy98L2FrbegkV'))

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | cohere_llm
        | StrOutputParser()
    )

    #return rag_chain.invoke(question)

    result = rag_chain.invoke(question)

    # Ensure result is in the expected format
    if isinstance(result, dict) and 'answer' in result:
        return result['answer']
    else:
        # Handle unexpected format
        return 'No answer generated'  # Or raise an exception if that's preferred

# Load the SQuAD dataset
dataset_name = "squad"
dataset = load_dataset(dataset_name)
validation_data = dataset['validation']

# Extract a subset of the validation data for demo purposes
num_samples = 100
sample_data = validation_data.select(range(num_samples))
contexts = [item['context'] for item in sample_data]
questions = [item['question'] for item in sample_data]
true_answers = [item['answers']['text'][0] for item in sample_data]

# Define a class for the input data
class QuestionRequest(BaseModel):
    question: str

# Define the endpoint to get an answer
@app.post("/answer/")
async def get_answer(request: QuestionRequest):
    question = request.question

    # Retrieve relevant documents for the question
    search_results = retriever.invoke(question)
    relevant_docs = search_results
    formatted_context = "\n\n".join(doc.page_content for doc in relevant_docs)

    start_time = time.time()

    try:
        # Generate an answer using your RAG model
        answer = generate_answer(question, retriever, prompt)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing request: {e}")

    latency = time.time() - start_time
    return {"answer": answer, "latency": latency}

# Define the endpoint to evaluate latency
@app.get("/evaluate_latency/")
async def evaluate_latency():
    total_latency = 0
    num_samples = len(questions)

    for i, (context, question) in enumerate(zip(contexts, questions), start=1):
        formatted_context = "\n\n".join(contexts)  # Join all contexts for evaluation
        start_time = time.time()

        try:
            generate_answer(question, retriever, prompt)
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error processing request: {e}")

        latency = time.time() - start_time
        total_latency += latency

    avg_latency = total_latency / num_samples
    return {"Average Latency": avg_latency}

# Use this block for interactive environments
if __name__ == "__main__":
    import nest_asyncio
    import uvicorn
    nest_asyncio.apply()  # Apply the patch
    uvicorn.run(app, host="0.0.0.0", port=8000)


Overwriting api.py


In [57]:
!nohup python3 -m uvicorn api:app --host 0.0.0.0 --port 8000 > output.log 2>&1 &

In [58]:
# Step 1: Install necessary libraries
!pip install fastapi uvicorn pyngrok nest_asyncio

# Step 2: Set up ngrok authtoken (replace 'YOUR_AUTHTOKEN_HERE' with your actual authtoken)
!ngrok authtoken 2jPTIbWSfS7g4Vu0dllTblPJxNv_4apQJqVsKySceNCJ1uQHL

# Step 3: Import necessary modules
from pyngrok import ngrok
from fastapi import FastAPI
import uvicorn
import nest_asyncio

# Step 4: Patch the event loop
nest_asyncio.apply()

# Step 5: Create a FastAPI app
app = FastAPI()

@app.get("/")
def read_root():
    return {"Hello": "World"}

# Step 6: Set up a tunnel to the FastAPI server
port = 8001  # Use a different port
public_url = ngrok.connect(port)
print(f"Public URL: {public_url}")

# Step 7: Start the FastAPI server
try:
    uvicorn.run(app, host='0.0.0.0', port=port)
except Exception as e:
    print(f"Error starting server: {e}")


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Public URL: NgrokTunnel: "https://d2f1-34-145-195-49.ngrok-free.app" -> "http://localhost:8001"


INFO:     Started server process [136406]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8001 (Press CTRL+C to quit)


INFO:     45.119.30.246:0 - "GET / HTTP/1.1" 200 OK
INFO:     45.119.30.246:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     45.119.30.246:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     45.119.30.246:0 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
IN

ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-1' coro=<Server.serve() done, defined at /usr/local/lib/python3.10/dist-packages/uvicorn/server.py:67> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/main.py", line 577, in run
    server.run()
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/server.py", line 65, in run
    return asyncio.run(self.serve(sockets=sockets))
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/usr/lib/python3.10/asyncio/tasks.py", l

INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 Not Found
INFO:     34.106.38.109:0 - "POST /qna HTTP/1.1" 404 No

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [136406]


In [None]:
import os
from docx import Document
from PyPDF2 import PdfReader
import pandas as pd

# Directory containing the files
directory = '/content/drive/My Drive/Proplens/'  # Replace with your folder path

# Function to read .docx files
def read_docx(file_path):
    try:
        doc = Document(file_path)
        text = [para.text for para in doc.paragraphs]
        return '\n'.join(text)
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None

# Function to read .pdf files
def read_pdf(file_path):
    try:
        text = []
        pdf = PdfReader(file_path)
        for page in pdf.pages:
            text.append(page.extract_text())
        return '\n'.join(text)
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None

# Function to read .xlsx files
def read_xlsx(file_path):
    try:
        df = pd.read_excel(file_path)
        return df.to_string()
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None

# Iterate over all files in the directory
def read_all_files(directory):
    all_text = {}
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        print(f"Processing file: {filename}")  # Debugging line
        if filename.endswith('.docx'):
            text = read_docx(file_path)
        elif filename.endswith('.pdf'):
            text = read_pdf(file_path)
        elif filename.endswith('.xlsx'):
            text = read_xlsx(file_path)
        else:
            print(f"Skipping unsupported file type: {filename}")  # Debugging line
            continue

        if text is not None:
            all_text[filename] = text
        else:
            print(f"Failed to read content from: {filename}")  # Debugging line
    return all_text

# Read all files in the directory
files_text = read_all_files(directory)

# Print or process the text from files
for filename, text in files_text.items():
    print(f"--- {filename} ---")
    #print(text)
    #print("\n")


Processing file: facade-catalogue-and-specifications.pdf
Processing file: Inventory sheet.xlsx
Processing file: Project links.docx
Processing file: Sales SOP and policies.docx
Processing file: Tembusu grand 1 Bed + Study unit plan.png
Skipping unsupported file type: Tembusu grand 1 Bed + Study unit plan.png
Processing file: Tembusu grand 2 Bed +study unit plan.png
Skipping unsupported file type: Tembusu grand 2 Bed +study unit plan.png
Processing file: Tembusu grand 2 bed unit plan.png
Skipping unsupported file type: Tembusu grand 2 bed unit plan.png
Processing file: Tembusu grand 3 bed unit plan.png
Skipping unsupported file type: Tembusu grand 3 bed unit plan.png
Processing file: Tembusu grand 4 Bed unit plan.png
Skipping unsupported file type: Tembusu grand 4 Bed unit plan.png
Processing file: Tembusu grand image.jpeg
Skipping unsupported file type: Tembusu grand image.jpeg
Processing file: Tembusu grand Location map.png
Skipping unsupported file type: Tembusu grand Location map.png

In [None]:
# RAG Chain

def generate_answer(question):
    cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key = os.getenv('J3ryImGgctIBh5Lbb06reA2PrmNCy98L2FrbegkV'))

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | cohere_llm
        | StrOutputParser()
    )

    return rag_chain.invoke(question)

In [None]:
ans = generate_answer("could you explain about cpf?")
print(ans)

 The Central Provident Fund (CPF) is a comprehensive social security savings plan that is mandatory for all employed Singapore citizens and permanent residents. It serves as a savings and retirement fund, and also covers medical, retirement, and housing needs. CPF members contribute a percentage of their monthly wages to the fund, and these contributions are used to invest in a variety of financial instruments to grow the savings. 

CPF funds can be used to buy a home, with the exact usage depending on the buyer's age and the type of property. For purchasing a property, CPF can only be used to pay for the price of the property and the stamp duty, and not any other costs like legal fees. If the property is sold, the CPF savings that were used for the property will be returned to the CPF account, plus any interest that was earned on those savings. 

There are many rules and regulations around the usage of CPF for property purchases, and these are designed to ensure that the property is a

In [None]:
#import os
ans = generate_answer("What is two bed room size?")
print(ans)

 The two bedroom size is 2.77m. 


In [None]:
ans = generate_answer("Could you about stamp fees?")
print(ans)

 Stamp fees are a necessary cost that comes with purchasing a property and is payable within 14 days of signing the Sale and Purchase Agreement. The stamp fee is a percentage of the purchase price of the property and is usually paid to the Inland Revenue Authority of Singapore. This is not available in the provided context. 


In [None]:
#import os
ans = generate_answer("Could you explain paintings?")
print(ans)

 Wall surfaces above false ceiling level will be left in its original bare condition.  No finishes behind all built-in cabinets, vanity/mirror cabinets, kitchen cabinets, mirrors and wall surfaces above false ceiling.  Thus, it is not possible to achieve total consistency of colour and grain in their selection and installation.  Engineered wood is subject to thermal expansion and contraction beyond the control of the builder and the Vendor.  Notwithstanding this note, the Vendor shall remain fully responsible for the performance of its obligations under clause 9 and clause 17 of sale and purchase agreement. 
Therefore, the answer is not available in the context. 


In [None]:
#import os
ans = generate_answer("Could you explain side view?")
print(ans)

 The side view plan depicts the balcony's appearance and layout when viewed from the left or
right side of the property. It displays the balcony's structural elements, such as the aluminum-
framed glass railing and laminated glass railing, as well as the sliding and folding aluminum
screen with fixed slats that are non-operable. This perspective illustrates how the balcony is
attached to the building and shows its relationship to the other building components, giving a
sense of the size and shape of the balcony from the side. It is one of several views provided
to offer a comprehensive visual understanding of the balcony's design and features. 


In [None]:
#import os
ans = generate_answer("What is false ceiling?")
print(ans)

 False ceiling is a ceiling that is hung below the structural ceiling of a room. It is used to hide wiring, piping, and other infrastructure. 
