In [None]:
import os
EURI_API_KEY = os.environ["EURI_API_KEY"]   

In [3]:
#from pypdf import pdf, PdfReader, PdfWriter
from pypdf import PdfReader, PdfWriter
from typing import List, Optional
from io import BytesIO
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
def read_pdfs_from_dir(pdf_dir: str):
    pdf_files = [f for f in os.listdir(pdf_dir) if f.lower().endswith(".pdf")]
    all_texts = {}

    for pdf_file in pdf_files:
        pdf_path = os.path.join(pdf_dir, pdf_file)
        reader = PdfReader(pdf_path)
        
        text = ""
        for page in reader.pages:
            text += page.extract_text() or ""
        
        all_texts[pdf_file] = text
    
    return all_texts



In [None]:

pdf_dir = r"C:\Data Science\Chatboat_31_08_2025\medical_report"
pdf_texts = read_pdfs_from_dir(pdf_dir)
all_content = []
for filename, content in pdf_texts.items():
    #print(f"\n--- {filename} ---\n")
    #print(content[:500])  # print first 500 chars
    all_content.append(content)
print(all_content)
print(len(all_content))     


['Medical History Report #1\nName: John Doe\nAge: 45\nHistory: John Doe, a 45-year-old male, has a history of hypertension diagnosed at age\n38. He has been on antihypertensive medication since then. In 2018, he\nsuffered a mild myocardial infarction and underwent angioplasty. He is a\nnon-smoker, exercises regularly, and follows a low-sodium diet. Family history\nis significant for cardiovascular disease. Recent labs show controlled blood\npressure and normal cholesterol levels. He is compliant with medications and\nattends regular follow-ups.\nAllergies: None\nMedications: Amlodipine 5mg, Atorvastatin 20mg\nSurgeries: Angioplasty (2018)\nNotes: Patient is stable. Continue current management.', 'Medical History Report #2\nName: Jane Smith\nAge: 60\nHistory: Jane Smith, a 60-year-old female, has type 2 diabetes mellitus diagnosed at\nage 50. She also has osteoarthritis of the knees. She underwent left knee\nreplacement in 2020. She monitors her blood glucose daily and is on oral\nhypog

In [6]:
# Split texts into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,)

In [7]:
chunks = []
for text in all_content:
    chunks.extend(text_splitter.split_text(text))
print(chunks)

['Medical History Report #1\nName: John Doe\nAge: 45\nHistory: John Doe, a 45-year-old male, has a history of hypertension diagnosed at age\n38. He has been on antihypertensive medication since then. In 2018, he\nsuffered a mild myocardial infarction and underwent angioplasty. He is a\nnon-smoker, exercises regularly, and follows a low-sodium diet. Family history\nis significant for cardiovascular disease. Recent labs show controlled blood\npressure and normal cholesterol levels. He is compliant with medications and\nattends regular follow-ups.\nAllergies: None\nMedications: Amlodipine 5mg, Atorvastatin 20mg\nSurgeries: Angioplasty (2018)\nNotes: Patient is stable. Continue current management.', 'Medical History Report #2\nName: Jane Smith\nAge: 60\nHistory: Jane Smith, a 60-year-old female, has type 2 diabetes mellitus diagnosed at\nage 50. She also has osteoarthritis of the knees. She underwent left knee\nreplacement in 2020. She monitors her blood glucose daily and is on oral\nhypog

In [None]:
# Code to create/store the index for FAISS and retreive the relevant documents

# langchain vectorstores documentation: https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/faiss
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from typing import List

def create_faiss_index(texts):
    # Create embeddings
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    
    # Create the FAISS index
    faiss_index = FAISS.from_texts(texts, embeddings)
    
    return faiss_index

def retrive_relevant_docs(vectorstore: FAISS, query: str, k: int = 4):
    return vectorstore.similarity_search(query, k=k)

    


In [9]:
# Create FAISS index
vectorstore = create_faiss_index(chunks)
print(vectorstore)

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
  from .autonotebook import tqdm as notebook_tqdm


<langchain_community.vectorstores.faiss.FAISS object at 0x000001F61DEC9F10>


In [10]:
from euriai.langchain import create_chat_model # Import the function to create a chat model - this is a wrapper around Langchain's ChatOpenAI built by EURON

def get_chat_model(api_key: str):

    return create_chat_model(api_key=api_key, 
                             model="gpt-4.1-nano", 
                             temperature=0.7)
    
def ask_chat_model(chat_model, question: str):

    response = chat_model.invoke(question)
    return response.content

In [11]:
# Initialize chat model
chat_model = get_chat_model(EURI_API_KEY)

In [12]:
chat_model

EuriaiChatModel(api_key='euri-cf9524cd168f919e0d809fc31399177ff083add13efac86b9e7faf30229f78f3')

In [33]:
prompt = "John Doe "
relevant_docs = retrive_relevant_docs(vectorstore, prompt)
relevant_docs

[Document(id='5bbf7b0d-a3d2-400f-8f03-de865d519422', metadata={}, page_content='Medical History Report #1\nName: John Doe\nAge: 45\nHistory: John Doe, a 45-year-old male, has a history of hypertension diagnosed at age\n38. He has been on antihypertensive medication since then. In 2018, he\nsuffered a mild myocardial infarction and underwent angioplasty. He is a\nnon-smoker, exercises regularly, and follows a low-sodium diet. Family history\nis significant for cardiovascular disease. Recent labs show controlled blood\npressure and normal cholesterol levels. He is compliant with medications and\nattends regular follow-ups.\nAllergies: None\nMedications: Amlodipine 5mg, Atorvastatin 20mg\nSurgeries: Angioplasty (2018)\nNotes: Patient is stable. Continue current management.'),
 Document(id='4ebef349-5ed9-4c4a-b95e-c8e52b3ceb99', metadata={}, page_content='Medical History Report #4\nName: Emily Clark\nAge: 28\nHistory: Emily Clark, a 28-year-old female, has hypothyroidism diagnosed at age 2

In [35]:
# Create context from relevant documents
context = "\n\n".join([doc.page_content for doc in relevant_docs])
# Create prompt with context
system_prompt = f"""You are MediChat Pro, an intelligent medical document assistant. 
                Based on the following medical documents, provide accurate and helpful answers. 
                If the information is not in the documents, clearly state that.
                While giving answers, make sure to cite the source document for reference. also,
                when you are giving an answer make sure to take help of LLM and give a full diagnosis of the problem.
                
Medical Documents:
{context}

User Question: {prompt}

Answer:"""
                
response = ask_chat_model(chat_model, system_prompt)
print(response)

Based on the provided medical history, John Doe is a 45-year-old male with a longstanding history of hypertension diagnosed at age 38, which has been well managed with medication (Amlodipine 5mg). He has a history of mild myocardial infarction in 2018, for which he underwent angioplasty, indicating some degree of coronary artery disease. His recent labs show controlled blood pressure and normal cholesterol levels, suggesting good adherence to his management plan.

**Full Diagnostic Summary:**
John's clinical profile indicates stable hypertensive cardiovascular disease with prior myocardial infarction, now well-controlled through antihypertensive medication and lifestyle measures. His history of MI and angioplasty places him at increased risk for future cardiovascular events, necessitating ongoing risk management. The absence of current symptoms and normal labs suggest his condition is stable, but continued vigilance is essential.

**Full Diagnosis:**
- Stable hypertension with previous