In [8]:
import numpy as np
from dotenv import load_dotenv
import os
import pandas as pd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [9]:
df = pd.read_csv("/home/root495/Inexture/CDSS-RAG/data/raw/conversation_summary.csv")
df = df.head(15)


In [10]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
You are a medical summarization expert. Given the following conversation between a doctor and a patient, and additional relevant clinical context (if available), generate a concise, accurate, and structured medical note summarizing the history, symptoms, relevant negatives, medical/social/family history, impression, and plan.
If clinical context retrieved via RAG is available, use it to make your summary more complete and accurate. If no additional context is provided, base your summary solely on the conversation.

Your summary should match the concise, professional style of a real clinical note and include important details while omitting irrelevant conversation.

Format the summary in a single paragraph, like this example:
'3/7 hx of diarrhea, mainly watery. No blood in stool. Opening bowels x6/day. Associated LLQ pain - crampy, intermittent, nil radiation. Also vomiting - mainly bilous. No blood in vomit. Fever on first day, nil since. Has been feeling lethargic and weak since. Takeaway 4/7 ago - Chinese restaurant. Wife and children also unwell with vomiting, but no diarrhea. No other unwell contacts. PMH: Asthma DH: Inhalers SH: works as an accountant. Lives with wife and children. Affecting his ADLs as has to be near toilet often. Nil smoking/etOH hx Imp: gastroenteritis Plan: Conservative management - rest, push fluids, paracetamol if feverish. Recommend OTC diarolyte. To review in 3-5d if symptoms not improving. To see earlier if feeling more unwell.'

Your answer will be evaluated using cosine similarity, BLEU, and ROUGE scoresâ€”so accuracy and inclusion of key clinical details matter.

If no relevant context is retrieved, answer based only on the conversation. If context is provided, integrate it to improve your summary.

Context:
{context}

Conversation:
{question}
""")

In [18]:
import os
from google.generativeai import configure, GenerativeModel

# Load API key
gemini_api_key = os.getenv("GOOGLE_API_KEY")
configure(api_key=gemini_api_key)

# Create LLM instance
gemini_llm = GenerativeModel("gemini-2.5-flash")


In [24]:
from langchain_core.runnables import Runnable
from langchain_core.messages import BaseMessage

class GeminiRunnable(Runnable):
    def __init__(self, model):
        self.model = model

    def _convert_input(self, inp):
        # Case 1: LangChain sends a dict
        if isinstance(inp, dict):
            return str(inp)

        # Case 2: LangChain sends a BaseMessage (HumanMessage, etc.)
        if isinstance(inp, BaseMessage):
            return inp.content

        # Case 3: LangChain sends list of messages
        if isinstance(inp, list):
            parts = []
            for item in inp:
                if isinstance(item, BaseMessage):
                    parts.append(item.content)
                else:
                    parts.append(str(item))
            return "\n".join(parts)

        # Case 4: Plain string
        if isinstance(inp, str):
            return inp

        # Fallback: convert anything else to string
        return str(inp)

    def invoke(self, input, config=None):
        clean_input = self._convert_input(input)
        response = self.model.generate_content(clean_input)
        return response.text


In [25]:
llm = GeminiRunnable(gemini_llm)


In [None]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

# Initialize embedding model as in preprocess.ipynb
embedding_model = HuggingFaceEmbeddings(
    model_name="emilyalsentzer/Bio_ClinicalBERT"
)

# Load Chroma DB as retriever
chroma_db = Chroma(
    persist_directory="/home/root495/Inexture/CDSS-RAG/notebooks/chroma_store",
    embedding_function=embedding_model
)
retriever = chroma_db.as_retriever()

  embedding_model = HuggingFaceEmbeddings(
2025-11-27 17:03:56.658867: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764243236.695292  114997 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764243236.711344  114997 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1764243236.737110  114997 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1764243236.737136  114997 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1764243236.737142  114997 comput

In [26]:
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
)


In [27]:
response = rag_chain.invoke("Explain RAG in simple words.")
print(response)


The provided conversation "Explain RAG in simple words." is not a clinical conversation between a doctor and a patient, and therefore a medical summary cannot be generated. The additional context provided also does not relate to a patient's history or symptoms from the conversation.


In [29]:
rag_summary = []
num = 0
for i in df["conversation"]:
    if num >= 15:
        break
    rag_summary.append(rag_chain.invoke(i))
    num += 1
    print(num)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15


In [30]:
df["rag_summary"] = rag_summary

In [31]:
df.to_csv("/home/root495/Inexture/CDSS-RAG/data/processed/conversation_summary_using_gemini.csv", index=False)

In [32]:
df

Unnamed: 0,conversation,summary,rag_summary
0,"Doctor: Hello? Hi. Um, should we start? Yeah, ...","3/7 hx of diarrhea, mainly watery. No blood in...","3/7 hx of loose, watery diarrhea (6-7 times/da..."
1,Doctor: Hello? Patient: Hello. Can you hear me...,"4/7 hx of dry itchy skin, mainly on chest and ...","4/7 hx of sore, red, severely itchy, cracked s..."
2,Doctor: Hello? Patient: Hello. Doctor: Hello t...,"Headache on left side. Started few hours ago, ...","Patient presents with a mid-day onset, left-si..."
3,"Doctor: Alex. Ohh. Hello? Hi, can you hear me?...","4/7 hx of generally unwell, mainly sore throat...",4/7 hx of general malaise. Started with painfu...
4,Doctor: Hello? Patient: Doctor: . Good morning...,2/7 ago developed lower abdo pain/suprapubic p...,"2/7 hx of lower abdominal pain, gradually wors..."
5,Doctor: Doctor: Hello? Patient: Hello there. D...,"2/5 hx of SOB, worsening over the past 2/7. Fe...",Hx of slowly worsening shortness of breath ove...
6,Doctor: Hello? Patient: Hello? Doctor: Hello? ...,5/7 hx of generally unwell with cough and cold...,5 days hx of feeling unwell and run-down. Symp...
7,"Patient: OK. Ohh, OK. Doctor: Hello? Patient: ...","3/7 hx of dry itchy skin, mainly on the hands ...",26yo F presenting with a few days history of s...
8,Patient: Hello? Doctor: Hello? Doctor: Hello? ...,3/7 hx of dysuria and suprapubic pain. Nil fre...,19-year-old female presents with a 3-day histo...
9,Doctor: hello can you hear me ok? Patient: Hel...,"1/52 hx of dysuria, frequency and suprapubic p...",19 yo F with hx of dysuria and urinary frequen...
