## Medical RAG Chatbot

In [1]:
import os
os.chdir('../')
# %pwd

In [2]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
#extract text from pdf files   #load all the pdf files from data folder
def load_pdf_files(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    return loader.load()

In [4]:
# --- MLflow Integration Start ---
import mlflow
import time
import json
import os
import dagshub
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Assume load_pdf_files is defined elsewhere, if not, here's an example:
def load_pdf_files(data_path):
    loader = DirectoryLoader(
        data_path,
        glob="*.pdf", #load all the pdf files from data folder
        loader_cls=PyPDFLoader
    )
    return loader.load()

# Set MLflow Tracking URI (where your MLflow server is running)
# For local file-based tracking (default):
# mlflow.set_tracking_uri("file:./mlruns")
# For a local server you started with 'mlflow server --host 0.0.0.0 --port 5000':


# mlflow.set_tracking_uri("http://127.0.0.1:5000")
dagshub.init(repo_owner='264Gaurav', repo_name='medical-chatbot', mlflow=True)

# Set the experiment name
mlflow.set_experiment("PDF_Processing_Experiment")

# Start an MLflow run to log this execution
with mlflow.start_run() as run:
    run_id = run.info.run_id
    print(f"MLflow Run ID: {run_id}")

    data_dir = 'data' # This is the path to your data directory
    mlflow.log_param("data_directory", data_dir)
    mlflow.log_param("pdf_loader_class", "PyPDFLoader")

    start_time = time.time()
    extracted_data = load_pdf_files(data_dir) # extracted_data will be a list of Document objects
    end_time = time.time()
    loading_duration = end_time - start_time

    mlflow.log_metric("num_documents_loaded", len(extracted_data))
    mlflow.log_metric("pdf_loading_duration_seconds", loading_duration)

    # Log the number of documents loaded - CORRECTED
    # 'extracted_data' is the list of loaded documents, so its length gives the number of documents
    num_documents = len(extracted_data)
    mlflow.log_param("num_documents", num_documents)

    # Log text splitter parameters
    chunk_size = 1000
    chunk_overlap = 50
    mlflow.log_param("chunk_size", chunk_size)
    mlflow.log_param("chunk_overlap", chunk_overlap)
    mlflow.log_param("text_splitter_class", "RecursiveCharacterTextSplitter")


    # Split the documents into chunks - CORRECTED
    # You should split 'extracted_data' (the list of Document objects), not 'data_dir' (the string path)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    chunks = text_splitter.split_documents(extracted_data) # Use extracted_data here

    # Log the number of chunks created
    num_chunks = len(chunks)
    mlflow.log_metric("num_chunks_created", num_chunks) # Changed to metric as it's an output count

    # Optional: Log a sample of the first few chunks as an artifact
    if chunks:
        chunks_sample_path = "chunks_sample.json"
        with open(chunks_sample_path, "w") as f:
            json.dump([chunk.dict() for chunk in chunks[:5]], f, indent=4) # Log first 5 chunks
        mlflow.log_artifact(chunks_sample_path, artifact_path="data_processing_artifacts")
        os.remove(chunks_sample_path) # Clean up local file

    print(f"MLflow Run ID: {run_id}")
    print(f"Loaded {num_documents} documents in {loading_duration:.2f} seconds.")
    print(f"Split into {num_chunks} chunks.")
    print(f"MLflow run finished. View at {mlflow.get_tracking_uri()}")


2025/07/28 03:21:14 INFO mlflow.tracking.fluent: Experiment with name 'PDF_Processing_Experiment' does not exist. Creating a new experiment.


MLflow Run ID: 89395cd4497548b782c2745f6aca6fb6


/var/folders/38/7jcjyd5s1cd09qhrzns_rny80000gn/T/ipykernel_33631/3889639866.py:74: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  json.dump([chunk.dict() for chunk in chunks[:5]], f, indent=4) # Log first 5 chunks


MLflow Run ID: 89395cd4497548b782c2745f6aca6fb6
Loaded 637 documents in 8.54 seconds.
Split into 3027 chunks.
MLflow run finished. View at https://dagshub.com/264Gaurav/medical-chatbot.mlflow
🏃 View run rare-pug-418 at: https://dagshub.com/264Gaurav/medical-chatbot.mlflow/#/experiments/0/runs/89395cd4497548b782c2745f6aca6fb6
🧪 View experiment at: https://dagshub.com/264Gaurav/medical-chatbot.mlflow/#/experiments/0


In [5]:
extracted_data[:5] #show first 5 pages extracted

[Document(metadata={'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'creator': 'PyPDF', 'creationdate': '2004-12-18T17:00:02-05:00', 'moddate': '2004-12-18T16:15:31-06:00', 'source': 'data/Medical_book.pdf', 'total_pages': 637, 'page': 0, 'page_label': '1'}, page_content=''),
 Document(metadata={'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'creator': 'PyPDF', 'creationdate': '2004-12-18T17:00:02-05:00', 'moddate': '2004-12-18T16:15:31-06:00', 'source': 'data/Medical_book.pdf', 'total_pages': 637, 'page': 1, 'page_label': '2'}, page_content='The GALE\nENCYCLOPEDIA\nof MEDICINE\nSECOND EDITION'),
 Document(metadata={'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'creator': 'PyPDF', 'creationdate': '2004-12-18T17:00:02-05:00', 'moddate': '2004-12-18T16:15:31-06:00', 'source': 'data/Medical_book.pdf', 'total_pages': 637, 'page': 2, 'page_label': '3'}, page_content='The GALE\nENCYCLOPEDIA\nof MEDICINE\nSECOND EDITION\nJACQUELINE L. LONGE, EDITOR\nDEIRDRE S. BLANCHFIELD, ASSOCIATE EDITOR\nVOLUME\nA-B\n1'),
 Doc

In [6]:
# from typing import List
# from langchain.schema import Document

# ##DATA cleaning and filtering
# def filter_docs(docs: List[Document]) -> List[Document]:
#     """
#     Given a list of Document objects, filter out those with new list of Document objects
#     containing only 'source' in metadata and the original page_content.
#     """
#     minimal_docs: List[Document] = []
#     for doc in docs:
#         src=doc.metadata.get('source')
#         minimal_docs.append(
#             Document(
#                 page_content=doc.page_content,
#                 metadata={"source": src}
#             )
#         )
#     return minimal_docs


import mlflow
from typing import List
from langchain.schema import Document

##DATA cleaning and filtering
def filter_docs(docs: List[Document]) -> List[Document]:
    """
    Given a list of Document objects, filter out those with new list of Document objects
    containing only 'source' in metadata and the original page_content.

    MLflow Tracking:
    - Logs 'input_documents_count' as a metric.
    - Logs 'output_documents_count' as a metric.
    - Logs 'documents_filtered_count' as a metric.
    - Logs 'filter_function_name' as a parameter.
    """



    # Log initial state
    input_documents_count = len(docs)
    mlflow.log_metric("input_documents_count_for_filtering", input_documents_count)
    mlflow.log_param("filter_function_name", "filter_docs")

    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get('source')
        # Here, you might add more complex filtering logic if needed
        # For this specific function, it always adds the document, just with minimal metadata
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
            )
        )

    output_documents_count = len(minimal_docs)
    documents_filtered_count = input_documents_count - output_documents_count # Will be 0 if all are kept

    # Log metrics after processing
    mlflow.log_metric("output_documents_count_after_filtering", output_documents_count)
    mlflow.log_metric("documents_filtered_count", documents_filtered_count)

    # Optional: Log a sample of filtered documents as an artifact
    if minimal_docs:
        sample_docs_path = "filtered_docs_sample.json"
        # Log metadata from first few documents as a sample
        sample_data = []
        for i, doc in enumerate(minimal_docs[:5]): # Log first 5 documents
            sample_data.append({
                "page_content_preview": doc.page_content[:200] + "...",
                "metadata": doc.metadata
            })
        with open(sample_docs_path, "w") as f:
            json.dump(sample_data, f, indent=4)
        mlflow.log_artifact(sample_docs_path, artifact_path="data_filtering_artifacts")
        os.remove(sample_docs_path) # Clean up local file

    return minimal_docs

In [7]:
minimal_docs = filter_docs(extracted_data)

In [8]:
minimal_docs[:5]

[Document(metadata={'source': 'data/Medical_book.pdf'}, page_content=''),
 Document(metadata={'source': 'data/Medical_book.pdf'}, page_content='The GALE\nENCYCLOPEDIA\nof MEDICINE\nSECOND EDITION'),
 Document(metadata={'source': 'data/Medical_book.pdf'}, page_content='The GALE\nENCYCLOPEDIA\nof MEDICINE\nSECOND EDITION\nJACQUELINE L. LONGE, EDITOR\nDEIRDRE S. BLANCHFIELD, ASSOCIATE EDITOR\nVOLUME\nA-B\n1'),
 Document(metadata={'source': 'data/Medical_book.pdf'}, page_content='STAFF\nJacqueline L. Longe, Project Editor\nDeirdre S. Blanchfield, Associate Editor\nChristine B. Jeryan, Managing Editor\nDonna Olendorf, Senior Editor\nStacey Blachford, Associate Editor\nKate Kretschmann, Melissa C. McDade, Ryan\nThomason, Assistant Editors\nMark Springer, Technical Specialist\nAndrea Lopeman, Programmer/Analyst\nBarbara J. Yarrow,Manager, Imaging and Multimedia\nContent\nRobyn V . Young,Project Manager, Imaging and\nMultimedia Content\nDean Dauphinais, Senior Editor, Imaging and\nMultimedia C

In [9]:
## split the documents into smaller chunks
def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return text_splitter.split_documents(minimal_docs)

In [10]:
text_chunks = text_split(minimal_docs)
print(f"Number of text chunks: {len(text_chunks)}")

Number of text chunks: 3027


In [11]:
text_chunks[:5]  # show first 5 text chunks

[Document(metadata={'source': 'data/Medical_book.pdf'}, page_content='The GALE\nENCYCLOPEDIA\nof MEDICINE\nSECOND EDITION'),
 Document(metadata={'source': 'data/Medical_book.pdf'}, page_content='The GALE\nENCYCLOPEDIA\nof MEDICINE\nSECOND EDITION\nJACQUELINE L. LONGE, EDITOR\nDEIRDRE S. BLANCHFIELD, ASSOCIATE EDITOR\nVOLUME\nA-B\n1'),
 Document(metadata={'source': 'data/Medical_book.pdf'}, page_content='STAFF\nJacqueline L. Longe, Project Editor\nDeirdre S. Blanchfield, Associate Editor\nChristine B. Jeryan, Managing Editor\nDonna Olendorf, Senior Editor\nStacey Blachford, Associate Editor\nKate Kretschmann, Melissa C. McDade, Ryan\nThomason, Assistant Editors\nMark Springer, Technical Specialist\nAndrea Lopeman, Programmer/Analyst\nBarbara J. Yarrow,Manager, Imaging and Multimedia\nContent\nRobyn V . Young,Project Manager, Imaging and\nMultimedia Content\nDean Dauphinais, Senior Editor, Imaging and\nMultimedia Content\nKelly A. Quin, Editor, Imaging and Multimedia Content\nLeitha Ethe

In [12]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

def download_embeddings():
    """
    Download and return the HuggingFace embeddings model.
    """
    model_name = "sentence-transformers/all-MiniLM-L6-v2"

     # Log embedding model parameters
    mlflow.log_param("embedding_model_name", model_name)
    mlflow.log_param("embedding_provider", "HuggingFace")
    mlflow.log_param("embedding_class", "HuggingFaceBgeEmbeddings")


    start_time = time.time()
    embeddings = HuggingFaceBgeEmbeddings(
        model_name=model_name
    )
    end_time = time.time()
    loading_duration = end_time - start_time

    # Log metrics
    mlflow.log_metric("embedding_loading_duration_seconds", loading_duration)

    print(f"Loaded embedding model '{model_name}' in {loading_duration:.2f} seconds.")



    return embeddings

embedding = download_embeddings()

  embeddings = HuggingFaceBgeEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


Loaded embedding model 'sentence-transformers/all-MiniLM-L6-v2' in 6.37 seconds.


In [13]:
vector = embedding.embed_query("Hello to medical chatbot.")
vector

  return forward_call(*args, **kwargs)


[-0.046040285378694534,
 0.05801551416516304,
 0.025460267439484596,
 -0.018886055797338486,
 -0.056887563318014145,
 -0.08692731708288193,
 0.0589798241853714,
 0.11981295794248581,
 0.02426879107952118,
 -0.05556875094771385,
 -0.018091533333063126,
 0.016502052545547485,
 -0.02637430652976036,
 0.011251488700509071,
 0.07773943245410919,
 -0.020143186673521996,
 0.03203606978058815,
 -0.0955556184053421,
 -0.016210833564400673,
 0.05219794809818268,
 0.08739541471004486,
 0.13612855970859528,
 0.054607000201940536,
 -0.007297117728739977,
 -0.03621230646967888,
 0.0024328413419425488,
 0.003911045845597982,
 -0.029571056365966797,
 0.07670431584119797,
 0.025621410459280014,
 0.0118556534871459,
 -0.010180271230638027,
 0.07361406832933426,
 0.07227780669927597,
 0.030669499188661575,
 0.0720134899020195,
 -0.042889174073934555,
 -0.0035438723862171173,
 -0.010086719878017902,
 0.02317650057375431,
 -0.01849573850631714,
 -0.07412372529506683,
 -0.009275458753108978,
 -0.01035922951

In [14]:
print(f"Vector length: {len(vector)}")  # Check the length of the vector  ## Dimentions of the vector
mlflow.log_param("vector_embedding_size", len(vector))

Vector length: 384


384

In [15]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [16]:
## Pinecone is the leading vector database for building accurate and performant AI applications

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [17]:
## Pinecone is the leading vector database for building accurate and performant AI applications
from pinecone import Pinecone
pinecone_api = PINECONE_API_KEY

pc = Pinecone(api_key=pinecone_api)


In [18]:
pc

<pinecone.pinecone.Pinecone at 0x169c93c20>

In [19]:
# from pinecone import ServerlessSpec

# index_name = 'medical-chatbot'

# if not pc.has_index(index_name):
#     pc.create_index(
#         name=index_name,
#         dimension=384,  # Dimension of the embeddings
#         metric='cosine',  # Similarity metric - cosine similarity
#         spec=ServerlessSpec(cloud="aws", region="us-east-1")
#     )

# index = pc.Index(index_name)





import mlflow
import time
from pinecone import Pinecone, ServerlessSpec



# Log stage name
mlflow.log_param("stage", "pinecone_index_setup")

index_name = 'medical-chatbot'
dimension = 384  # Dimension of the embeddings (should match your embedding model output)
metric = 'cosine' # Similarity metric

# Log Pinecone index parameters
mlflow.log_param("pinecone_index_name", index_name)
mlflow.log_param("pinecone_index_dimension", dimension)
mlflow.log_param("pinecone_index_metric", metric)

# Define the serverless spec parameters
cloud_provider = "aws"
region = "us-east-1"
mlflow.log_param("pinecone_cloud_provider", cloud_provider)
mlflow.log_param("pinecone_region", region)


# Check if index exists and create if not
start_check_time = time.time()
index_exists = pc.has_index(index_name)
end_check_time = time.time()
check_duration = end_check_time - start_check_time

mlflow.log_metric("pinecone_index_exists_check_duration_seconds", check_duration)
mlflow.log_param("pinecone_index_existed_before_run", index_exists)

if not index_exists:
    mlflow.log_param("pinecone_index_action", "created_new_index")
    print(f"Pinecone index '{index_name}' does not exist. Creating...")
    start_create_time = time.time()
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric=metric,
        spec=ServerlessSpec(cloud=cloud_provider, region=region)
    )
    end_create_time = time.time()
    creation_duration = end_create_time - start_create_time
    mlflow.log_metric("pinecone_index_creation_duration_seconds", creation_duration)
    print(f"Pinecone index '{index_name}' created in {creation_duration:.2f} seconds.")
else:
    mlflow.log_param("pinecone_index_action", "connected_to_existing_index")
    print(f"Pinecone index '{index_name}' already exists. Connecting...")
    mlflow.log_metric("pinecone_index_creation_duration_seconds", 0) # Log 0 if not created

# Connect to the index
start_connect_time = time.time()
index = pc.Index(index_name)
end_connect_time = time.time()
connect_duration = end_connect_time - start_connect_time

mlflow.log_metric("pinecone_index_connection_duration_seconds", connect_duration)
print(f"Connected to Pinecone index '{index_name}' in {connect_duration:.4f} seconds.")

# Optional: Log some basic info about the connected index (e.g., number of vectors)
# This might require querying the index, which adds time, so consider if needed for every run.
try:
    index_info = index.describe_index_stats()
    mlflow.log_metric("pinecone_total_vector_count", index_info.dimension) # This gives dimension, not vector count directly
    # To get actual vector count:
    if index_info.namespaces:
        total_vectors_in_index = sum(ns.vector_count for ns_name, ns in index_info.namespaces.items())
        mlflow.log_metric("pinecone_total_vectors_in_index", total_vectors_in_index)
        mlflow.log_param("pinecone_namespaces", list(index_info.namespaces.keys()))
except Exception as e:
    print(f"Could not get Pinecone index stats: {e}")
    mlflow.log_param("pinecone_index_stats_error", str(e))


Pinecone index 'medical-chatbot' already exists. Connecting...
Connected to Pinecone index 'medical-chatbot' in 0.9872 seconds.


In [20]:
# from langchain_pinecone import PineconeVectorStore

# docsearch = PineconeVectorStore.from_documents(
#     documents=text_chunks,
#     embedding=embedding,
#     index_name=index_name
# )



import mlflow
import time
from langchain_pinecone import PineconeVectorStore



# Log input parameters for this step
mlflow.log_param("vector_store_type", "PineconeVectorStore")
mlflow.log_param("index_name_for_upsertion", index_name) # Re-log index name for this specific step
mlflow.log_param("num_text_chunks_for_upsertion", len(text_chunks))

# Assume embedding model name was already logged, if not, you might log it here again
# mlflow.log_param("embedding_model_used_for_upsertion", embedding.model_name) # if embedding object has a model_name attribute

print(f"Starting upsertion to Pinecone index '{index_name}' with {len(text_chunks)} chunks...")

start_time = time.time()
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embedding,
    index_name=index_name
)
end_time = time.time()
upsertion_duration = end_time - start_time

# Log metrics
mlflow.log_metric("pinecone_upsertion_duration_seconds", upsertion_duration)

print(f"Upsertion to Pinecone completed in {upsertion_duration:.2f} seconds.")

Starting upsertion to Pinecone index 'medical-chatbot' with 3027 chunks...


  return forward_call(*args, **kwargs)


Upsertion to Pinecone completed in 25.67 seconds.


In [21]:
# Load Existing index

from langchain_pinecone import PineconeVectorStore
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedding
)

## Adding more data to the existing Pinecone vector DB

In [22]:
swe = Document(
    page_content="Hi, My name is Gaurav singh. I am a software engineer. I am working on a project related to medical chatbot.",
    metadata={"source": "gaurav"}
)

In [23]:
docsearch.add_documents(documents=[swe])

['d581ca8c-ca42-48a0-a7ec-736ecffafb0e']

In [24]:
# ## Retrieve documents from Pinecone index as per the similarity

# retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

# retrieved_docs = retriever.invoke("What is Cancer?")
# retrieved_docs



import mlflow
import time
import json
import os # For cleaning up temporary artifact files


# Define parameters for retrieval
retrieval_search_type = "similarity"
retrieval_k = 3
test_query = "What is Cancer?"

# 1. Log Retrieval Parameters
mlflow.log_param("retriever_search_type", retrieval_search_type)
mlflow.log_param("retriever_k_value", retrieval_k)
mlflow.log_param("retrieval_query", test_query) # Log the specific query used

# Configure the retriever
retriever = docsearch.as_retriever(search_type=retrieval_search_type, search_kwargs={"k": retrieval_k})

# 2. Measure and Log Retrieval Time
start_time = time.time()
retrieved_docs = retriever.invoke(test_query)
end_time = time.time()
retrieval_duration = end_time - start_time
mlflow.log_metric("retrieval_duration_seconds", retrieval_duration)

# 3. Log Retrieved Document Count
num_retrieved = len(retrieved_docs)
mlflow.log_metric("num_retrieved_documents", num_retrieved)

# 4. Log a Sample of Retrieved Documents as an Artifact
if retrieved_docs:
    sample_docs = []
    # Log details of the first 3 documents as a sample
    for i, doc in enumerate(retrieved_docs[:3]):
        sample_docs.append({
            "index": i + 1,
            "source": doc.metadata.get('source', 'N/A'),
            "page": doc.metadata.get('page', 'N/A'),
            "content_preview": doc.page_content[:200] + "..." # Log first 200 chars
        })

    temp_file_path = "retrieved_docs_sample.json"
    with open(temp_file_path, "w") as f:
        json.dump(sample_docs, f, indent=4)

    mlflow.log_artifact(temp_file_path, artifact_path="retrieval_output_samples")
    os.remove(temp_file_path) # Clean up the local temporary file

print(f"Retrieved {num_retrieved} documents for '{test_query}' in {retrieval_duration:.4f} seconds.")
# You can also print a preview of retrieved_docs here if needed for immediate feedback.

  return forward_call(*args, **kwargs)


Retrieved 3 documents for 'What is Cancer?' in 0.4099 seconds.


In [25]:
# from langchain_google_genai import ChatGoogleGenerativeAI

# chatModel = ChatGoogleGenerativeAI(
#     model="gemini-2.0-flash",
#     temperature=0,
#     max_tokens=None,
#     timeout=None,
#     max_retries=2,
# )






import mlflow
import time
from langchain_google_genai import ChatGoogleGenerativeAI


# Define LLM parameters
llm_model_name = "gemini-2.0-flash"
llm_temperature = 0
llm_max_tokens = None
llm_timeout = None
llm_max_retries = 2

# Log LLM parameters
mlflow.log_param("llm_provider", "Google_GenAI")
mlflow.log_param("llm_class", "ChatGoogleGenerativeAI")
mlflow.log_param("llm_model_name", llm_model_name)
mlflow.log_param("llm_temperature", llm_temperature)
mlflow.log_param("llm_max_tokens", llm_max_tokens if llm_max_tokens is not None else "None") # Handle None gracefully
mlflow.log_param("llm_timeout", llm_timeout if llm_timeout is not None else "None") # Handle None gracefully
mlflow.log_param("llm_max_retries", llm_max_retries)


print(f"Initializing ChatGoogleGenerativeAI model: {llm_model_name}...")
start_time = time.time()
chatModel = ChatGoogleGenerativeAI(
    model=llm_model_name,
    temperature=llm_temperature,
    max_tokens=llm_max_tokens,
    timeout=llm_timeout,
    max_retries=llm_max_retries,
)
end_time = time.time()
initialization_duration = end_time - start_time

# Log metrics
mlflow.log_metric("llm_initialization_duration_seconds", initialization_duration)

print(f"ChatGoogleGenerativeAI model initialized in {initialization_duration:.4f} seconds.")


Initializing ChatGoogleGenerativeAI model: gemini-2.0-flash...
ChatGoogleGenerativeAI model initialized in 0.0363 seconds.


In [26]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [27]:
# system_prompt = (
#     "You are an Medical assistant for question-answering tasks. "
#     "Use the following pieces of retrieved context to answer "
#     "the question. If you don't know the answer, say that you "
#     "don't know. Use five sentences maximum and keep the "
#     "answer concise."
#     "\n\n"
#     "{context}"
# )


# prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", system_prompt),
#         ("human", "{input}"),
#     ]
# )









import mlflow
import hashlib # For creating a hash of the prompt string
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are an Medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use five sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

# Create the ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# 1. Log the system prompt string as a parameter
mlflow.log_param("system_prompt_template", system_prompt)

# 2. Log the overall prompt template structure or its string representation
# You can convert the ChatPromptTemplate to a string for logging
full_prompt_template_str = str(prompt)
mlflow.log_param("full_chat_prompt_template", full_prompt_template_str)

# 3. Log a hash of the full prompt template string for easy version comparison
prompt_hash = hashlib.sha256(full_prompt_template_str.encode('utf-8')).hexdigest()
mlflow.log_param("full_prompt_template_hash", prompt_hash)

# 4. Log the input variables used in the prompt
mlflow.log_param("prompt_input_variables", prompt.input_variables)


# Optional: Save the prompt template to a file as an artifact
# This can be useful if your prompts become very long or complex
prompt_file_name = "chat_prompt_template.txt"
with open(prompt_file_name, "w") as f:
    f.write(full_prompt_template_str)
mlflow.log_artifact(prompt_file_name, artifact_path="prompt_templates")
os.remove(prompt_file_name) # Clean up local file

print("Prompt template defined and logged to MLflow.")
# --- MLflow Integration End ---



Prompt template defined and logged to MLflow.


In [28]:
# question_answer_chain = create_stuff_documents_chain(chatModel, prompt)
# rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# response = rag_chain.invoke({"input": "What is Acne?"})
# print(response)






# Define the input query for this run
rag_input_query = "What is Acne?"

# Log the input query for the RAG chain
mlflow.log_param("rag_chain_input_query", rag_input_query)


print("Creating question-answer chain...")
Youtube_chain = create_stuff_documents_chain(chatModel, prompt)

print("Creating retrieval chain...")
rag_chain = create_retrieval_chain(retriever, Youtube_chain)

print(f"Invoking RAG chain for query: '{rag_input_query}'...")
start_time = time.time()
response = rag_chain.invoke({"input": rag_input_query})
end_time = time.time()
rag_chain_duration = end_time - start_time

# Log overall RAG chain execution duration
mlflow.log_metric("rag_chain_execution_duration_seconds", rag_chain_duration)

# Log the generated answer
generated_answer = response.get("answer", "No answer found.")
mlflow.log_param("generated_answer", generated_answer) # Log the full answer

# Log the retrieved source documents as an artifact
source_documents = response.get("context", []) # 'context' typically holds source documents
if source_documents:
    retrieved_context_details = []
    for i, doc in enumerate(source_documents):
        retrieved_context_details.append({
            "index": i + 1,
            "source": doc.metadata.get('source', 'N/A'),
            "page": doc.metadata.get('page', 'N/A'),
            "content_preview": doc.page_content[:300] + "..." # Log first 300 chars
        })

    context_artifact_path = "rag_chain_retrieved_context.json"
    with open(context_artifact_path, "w") as f:
        json.dump(retrieved_context_details, f, indent=4)
    mlflow.log_artifact(context_artifact_path, artifact_path="rag_chain_outputs")
    os.remove(context_artifact_path) # Clean up local file

    mlflow.log_metric("num_context_documents_in_chain", len(source_documents))
else:
    mlflow.log_metric("num_context_documents_in_chain", 0)


print(f"RAG Chain Response for '{rag_input_query}':")
print(generated_answer)
print(f"\nTime taken for RAG chain: {rag_chain_duration:.4f} seconds.")




Creating question-answer chain...
Creating retrieval chain...
Invoking RAG chain for query: 'What is Acne?'...


  return forward_call(*args, **kwargs)


RAG Chain Response for 'What is Acne?':
Acne vulgaris is a skin condition, and there are therapies for it.

Time taken for RAG chain: 2.5657 seconds.


In [29]:
response = rag_chain.invoke({"input": "Wwhat is Acromegaly and gigantism?"})
print(response["answer"])

  return forward_call(*args, **kwargs)


Acromegaly and gigantism are conditions related to growth hormone excess.


In [30]:
response = rag_chain.invoke({"input": "What is the treatment of Acromegaly and gigantism?"})
print(response["answer"])

  return forward_call(*args, **kwargs)


I am sorry, but the provided text does not contain information about the treatment of Acromegaly and gigantism.


In [31]:
##Since my RAG have some info about myself. So, I can ask about myself
response = rag_chain.invoke({"input": "Who is Gaurav?"})
print(response["answer"])

  return forward_call(*args, **kwargs)


I'm sorry, but the provided text does not contain any information about a person named Gaurav. The text focuses on Alexander's journey to improve his coordination and overcome vocal problems through self-observation and redirecting his thoughts.
