In [1]:
import torch

print("Number of GPUs available:", torch.cuda.device_count())
print("GPU Name:", torch.cuda.get_device_name())

Number of GPUs available: 1
GPU Name: NVIDIA GeForce RTX 3050 Laptop GPU


In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from openai import OpenAI
from flask_cors import CORS
from flask import Flask, request, jsonify
from flask_ngrok import run_with_ngrok

In [3]:
def load_documents():
    document_loader = PyPDFDirectoryLoader("./DATA2")
    return document_loader.load()

In [4]:
docs = load_documents()
docs[0]

Document(metadata={'source': 'DATA2\\10.1177_20533691231216162.pdf', 'page': 0}, page_content='Original article\nPost Reproductive Health\n2023, Vol. 29(4) 201– 221\n© The Author(s) 2023\nArticle reuse guidelines:\nsagepub.com/journals-permissions\nDOI: 10.1177/20533691231216162\njournals.sagepub.com/home/min\nPerimenopausal women’s voices: How does\ntheir period at the end of reproductive life\naffect wellbeing?\nElizabeth Ray1, Jacqueline A Maybin2 and Joyce C Harper1\ue840\nAbstract\nObjective: To explore perimenopausal women’s feelings towards their periods, the impact on their wellbeing and how we\ncan support them.\nStudy design:Participants were recruited for focus groups through social media advertisements. In 6 online focus groups,\n31 perimenopausal women aged 40– 55 living in the UK were asked 5 questions relating to periods and perimenopause,\nsupport and education.\nMain outcome measures:Content analysis.\nResults: When asked How do you feel about having a period? The part

In [5]:
def split_document(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 800,
        chunk_overlap = 80,
        length_function = len,
        is_separator_regex = False,
    )
    return text_splitter.split_documents(docs)

In [6]:
chunks = split_document(docs)
chunks[0]

Document(metadata={'source': 'DATA2\\10.1177_20533691231216162.pdf', 'page': 0}, page_content='Original article\nPost Reproductive Health\n2023, Vol. 29(4) 201– 221\n© The Author(s) 2023\nArticle reuse guidelines:\nsagepub.com/journals-permissions\nDOI: 10.1177/20533691231216162\njournals.sagepub.com/home/min\nPerimenopausal women’s voices: How does\ntheir period at the end of reproductive life\naffect wellbeing?\nElizabeth Ray1, Jacqueline A Maybin2 and Joyce C Harper1\ue840\nAbstract\nObjective: To explore perimenopausal women’s feelings towards their periods, the impact on their wellbeing and how we\ncan support them.\nStudy design:Participants were recruited for focus groups through social media advertisements. In 6 online focus groups,\n31 perimenopausal women aged 40– 55 living in the UK were asked 5 questions relating to periods and perimenopause,\nsupport and education.')

In [7]:
nvidia_api_key = "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"
    
client = OpenAI(
    base_url = "https://integrate.api.nvidia.com/v1",
    api_key = nvidia_api_key
)

In [8]:
def get_embedding_function():
    embeddings = NVIDIAEmbeddings(
        model="NV-Embed-QA", 
        api_key=nvidia_api_key, 
        truncate="NONE", 
    )
    return embeddings

In [9]:
def add_to_chroma(chunks: list[Document]):
    db = Chroma(
        persist_directory = "./db", embedding_function = get_embedding_function()
    )
   
    #Add pr update DB
    exisitng_items = db.get(include = [])
    existing_ids = set(exisitng_items["ids"])
    print(f"Number of Existing documents in DB: {len(existing_ids)}")
    
    #Only add docs that don't exist in DB
    new_chunks = []
    for chunk in chunks_with_ids:
        if chunk.metadata["id"] not in existing_ids:
            new_chunks.append(chunk)
    new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
    db.add_documents(new_chunks, ids = new_chunk_ids)
    db.persist()

def calculate_chunk_ids(chunks):

    # This will create IDs
    # Page Source : Page Number : Chunk Index

    last_page_id = None
    current_chunk_index = 0

    for chunk in chunks:
        source = chunk.metadata.get("source")
        page = chunk.metadata.get("page")
        current_page_id = f"{source}:{page}"

        # If the page ID is the same as the last one, increment the index.
        if current_page_id == last_page_id:
            current_chunk_index += 1
        else:
            current_chunk_index = 0

        # Calculate the chunk ID.
        chunk_id = f"{current_page_id}:{current_chunk_index}"
        last_page_id = current_page_id

        # Add it to the page meta-data.
        chunk.metadata["id"] = chunk_id

    return chunks

In [10]:
def query_rag(query):
    embedding_function = get_embedding_function()
    db = Chroma(
        persist_directory = "./db",
        embedding_function = embedding_function,
    )

    results = db.similarity_search_with_score(query,k = 3)
    context_data = "\n".join([doc.page_content for doc in results])
    
    prompt_template = f"""
    <|context|>
    You are a compassionate, knowledgeable virtual medical assistant focused solely on women’s health.
    Your goal is to provide precise, empathetic, and accurate information related to women’s health, conditions, and wellness.And also
    provide the imformation in a polite girly manner(accroding to the user's way of questioning), and when needed act faminine and make the user feel
    safe.
    You should only answer questions about women's health issues.

    If a question is asked that pertains to men’s or male's health or any other unrelated topic, respond with:
    "I am specifically designed to address topics related to women’s health. Please feel free to ask any questions you may have on this subject.😄"

    You can refer to the following data for your answers:
    {context_data}

    Be clear, direct, and supportive in your answers.
    </s>
    <|user|>
    {query}
    </s>
    <assistant|>
    """
    return nvidia_bot(prompt_template)


In [11]:
def nvidia_bot(prompt_template):
    completion = client.chat.completions.create(
      model="nvidia/llama-3.1-nemotron-70b-instruct",
      messages=[{"role":"user","content":prompt_template}],
      temperature=0.2,
      top_p=1,
      max_tokens=2840,
      stream=True,
    )

    result = ""
    for chunk in completion:
      if chunk.choices[0].delta.content is not None:
            result += chunk.choices[0].delta.content
    return result.strip()

In [None]:
app = Flask(__name__)
CORS(app)

@app.route("/api/query", methods=["POST"])
def handle_query():
    data = request.get_json()
    query = data.get("query")
    if not query:
        return jsonify({"error": "Query is required"}), 400

    response = query_rag(query)
    return jsonify({"response": response})

app.run()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [03/May/2025 00:13:34] "OPTIONS /api/query HTTP/1.1" 200 -
127.0.0.1 - - [03/May/2025 00:13:49] "POST /api/query HTTP/1.1" 200 -
127.0.0.1 - - [03/May/2025 10:49:31] "OPTIONS /api/query HTTP/1.1" 200 -
127.0.0.1 - - [03/May/2025 10:49:37] "POST /api/query HTTP/1.1" 200 -
