In [None]:
# MedXpert/src/ui/pages/compare.py


import streamlit as st
import os
import tempfile

from src.pipeline.blip_captioning import generate_blip_captions
from src.pipeline.llm_report_generation import generate_report
from src.llm_providers import llm_fn

st.header("🔍 Compare Two X-ray Images")

col1, col2 = st.columns(2)

with col1:
    img1 = st.file_uploader("Upload first image", type=["png", "jpg", "jpeg"], key="img1")

with col2:
    img2 = st.file_uploader("Upload second image", type=["png", "jpg", "jpeg"], key="img2")

def save_temp_image(uploaded_file):
    temp_dir = tempfile.gettempdir()
    temp_img_path = os.path.join(temp_dir, uploaded_file.name)
    with open(temp_img_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    return temp_img_path

if st.button("Compare & Analyze"):
    if not img1 or not img2:
        st.warning("Please upload both images.")
    else:
        img1_path = save_temp_image(img1)
        img2_path = save_temp_image(img2)

        captions = generate_blip_captions([img1_path, img2_path])

        st.subheader("🖼️ BLIP Captions")
        st.markdown(f"**Image 1:** {captions[0]}")
        st.markdown(f"**Image 2:** {captions[1]}")

        st.subheader("📋 Comparative Diagnosis (via LLM)")
        prompt = f"""
Compare the following radiology findings from two X-rays:

Image 1: {captions[0]}
Image 2: {captions[1]}

What are the differences or changes observed?
"""
        comparison_report = llm_fn(prompt)
        st.text_area("Comparative Analysis", comparison_report, height=300)

# MedXpert/src/ui/pages/diagnosis.py

import streamlit as st
import tempfile
import os

from src.pipeline.blip_captioning import generate_blip_captions
from src.pipeline.llm_report_generation import generate_report
from src.llm_providers import llm_fn  # Real LLM interface

st.header("🩻 Direct Diagnosis from X-ray")

uploaded_file = st.file_uploader("Upload a chest X-ray image", type=["png", "jpg", "jpeg"])

def save_temp_image(file):
    temp_dir = tempfile.gettempdir()
    img_path = os.path.join(temp_dir, file.name)
    with open(img_path, "wb") as f:
        f.write(file.getbuffer())
    return img_path

if st.button("Generate Diagnosis"):
    if not uploaded_file:
        st.warning("Please upload an image.")
    else:
        image_path = save_temp_image(uploaded_file)

        st.subheader("🧠 Image Caption (via BLIP)")
        caption = generate_blip_captions([image_path])[0]
        st.info(f"📝 Caption: {caption}")

        st.subheader("📋 Diagnostic Report (via LLM)")
        report = generate_report([caption], [], llm_fn)
        st.text_area("Generated Diagnosis", report, height=300)



# MedXpert/src/ui/pages/search.py

import streamlit as st
import os
import tempfile
import json

from src.pipeline.clip_retrieval import retrieve_top_k
from src.pipeline.blip_captioning import generate_blip_captions
from src.pipeline.llm_report_generation import generate_report
from src.llm_providers import llm_fn  # your actual LLM API function

st.header("🔎 Visual + Text Search")

mode = st.radio("Choose retrieval mode:", ["Text → Image/Text", "Image → Text"])
top_k = st.slider("How many results to retrieve?", 1, 10, 3)

# Load test dataset
@st.cache_data
def load_dataset():
    with open("data/processed/texts/test.json") as f:
        return json.load(f)

dataset = load_dataset()

def save_temp_image(uploaded_file):
    temp_dir = tempfile.gettempdir()
    path = os.path.join(temp_dir, uploaded_file.name)
    with open(path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    return path

def display_results(indices):
    samples = [dataset[i] for i in indices]
    image_paths = [s["image_id"] for s in samples]
    texts = [s["text"] for s in samples]

    st.subheader("📸 Retrieved X-ray Images + Captions")
    captions = generate_blip_captions(image_paths)

    cols = st.columns(len(image_paths))
    for i, col in enumerate(cols):
        col.image(image_paths[i], caption=captions[i], use_column_width=True)

    st.subheader("📝 AI-Generated Diagnostic Report")
    report = generate_report(captions, texts, llm_fn)
    st.text_area("Report Output", report, height=250)

# User input section
if mode == "Text → Image/Text":
    query = st.text_input("Enter medical query:", "What abnormality is present?")
    if st.button("Search & Generate Report"):
        if not query:
            st.warning("Please enter a valid query.")
        else:
            indices, _ = retrieve_top_k(query, mode="text", k=top_k)
            display_results(indices)

elif mode == "Image → Text":
    uploaded_file = st.file_uploader("Upload chest X-ray:", type=["png", "jpg", "jpeg"])
    if st.button("Search & Generate Report"):
        if not uploaded_file:
            st.warning("Please upload a file.")
        else:
            image_path = save_temp_image(uploaded_file)
            indices, _ = retrieve_top_k(image_path, mode="image", k=top_k)
            display_results(indices)


# /home/sysadm/Music/MedXpert/src/ui/app.py

import streamlit as st

st.set_page_config(
    page_title="MedXpert",
    page_icon="🧠",
    layout="wide",
)

st.title("🧠 MedXpert: Medical Visual Question Answering & Diagnosis Assistant")

st.markdown("""
Welcome to **MedXpert**, a full-stack medical AI assistant that can:

- 🖼️ Analyze chest X-ray images
- 🔍 Retrieve most similar cases and findings
- 📝 Generate diagnostic reports
- 💬 Answer clinical questions
- 📊 Compare cases and visualize results

Navigate from the sidebar to start exploring different modules.
""")



In [None]:
./data/processed/texts:
total 6904
-rw-rw-r-- 1 sysadm sysadm  698113 May 18 00:09 test.json
-rw-rw-r-- 1 sysadm sysadm 5619568 May 18 00:09 train.json
-rw-rw-r-- 1 sysadm sysadm  746886 May 18 00:09 validation.json

./data/raw:
total 0

./models:
total 12
drwxrwxr-x 4 sysadm sysadm 4096 May 18 11:27 clip
-rw-rw-r-- 1 sysadm sysadm    0 May  3 13:42 __init__.py
drwxrwxr-x 2 sysadm sysadm 4096 May  3 00:16 llm
drwxrwxr-x 2 sysadm sysadm 4096 May  3 13:47 __pycache__

./models/clip:
total 44
-rw-rw-r-- 1 sysadm sysadm     0 May  3 00:16 config.yaml
-rw-rw-r-- 1 sysadm sysadm 14372 May 18 11:39 dataset.py
drwxrwxr-x 2 sysadm sysadm  4096 May 18 12:23 fine_tuned
drwxrwxr-x 2 sysadm sysadm  4096 May 18 11:39 __pycache__
-rw-rw-r-- 1 sysadm sysadm 17685 May 18 02:23 train.py
-rw-rw-r-- 1 sysadm sysadm     0 May  3 00:16 utils.py

./models/clip/fine_tuned:
total 11235608
-rw-rw-r-- 1 sysadm sysadm 1815823321 May 18 14:38 best_model.pt
-rw-rw-r-- 1 sysadm sysadm 1815836105 May 18 14:29 checkpoint_epoch_1.pt
-rw-rw-r-- 1 sysadm sysadm 1815836105 May 18 14:33 checkpoint_epoch_2.pt
-rw-rw-r-- 1 sysadm sysadm 1815836105 May 18 14:37 checkpoint_epoch_3.pt
-rw-rw-r-- 1 sysadm sysadm 1815836105 May 18 14:12 checkpoint_epoch_4.pt
-rw-rw-r-- 1 sysadm sysadm 1815836105 May 18 14:16 checkpoint_epoch_5.pt
-rw-rw-r-- 1 sysadm sysadm       1210 May 18 14:38 config.json
-rw-rw-r-- 1 sysadm sysadm     524619 May 18 14:38 merges.txt
-rw-rw-r-- 1 sysadm sysadm  605156676 May 18 14:38 model.safetensors
-rw-rw-r-- 1 sysadm sysadm        504 May 18 14:38 preprocessor_config.json
-rw-rw-r-- 1 sysadm sysadm        588 May 18 14:38 special_tokens_map.json
-rw-rw-r-- 1 sysadm sysadm        774 May 18 14:38 tokenizer_config.json
-rw-rw-r-- 1 sysadm sysadm    3642073 May 18 14:38 tokenizer.json
-rw-rw-r-- 1 sysadm sysadm     862328 May 18 14:38 vocab.json

./models/clip/__pycache__:
total 8
-rw-rw-r-- 1 sysadm sysadm 1059 May 18 11:39 dataset.cpython-310.pyc
-rw-rw-r-- 1 sysadm sysadm 4032 May 18 02:24 train.cpython-310.pyc

./results:
total 16
drwxrwxr-x 2 sysadm sysadm 4096 May 18 11:22 fine_tuned
drwxrwxr-x 2 sysadm sysadm 4096 May 18 15:47 inference_plots
drwxrwxr-x 2 sysadm sysadm 4096 May 18 12:32 plots
-rw-rw-r-- 1 sysadm sysadm  775 May 18 14:37 train_history.tsv

./results/fine_tuned:
total 595916
-rw-rw-r-- 1 sysadm sysadm      1210 May 18 02:57 config.json
-rw-rw-r-- 1 sysadm sysadm    524619 May 18 02:57 merges.txt
-rw-rw-r-- 1 sysadm sysadm 605156676 May 18 02:57 model.safetensors
-rw-rw-r-- 1 sysadm sysadm       504 May 18 02:57 preprocessor_config.json
-rw-rw-r-- 1 sysadm sysadm       588 May 18 02:57 special_tokens_map.json
-rw-rw-r-- 1 sysadm sysadm       774 May 18 02:57 tokenizer_config.json
-rw-rw-r-- 1 sysadm sysadm   3642073 May 18 02:57 tokenizer.json
-rw-rw-r-- 1 sysadm sysadm    862328 May 18 02:57 vocab.json

./results/inference_plots:
total 296
-rw-rw-r-- 1 sysadm sysadm 302287 May 18 15:47 query_0.png

./results/plots:
total 76
-rw-rw-r-- 1 sysadm sysadm 74395 May 18 12:32 loss_plot.png

./scripts:
total 52
drwxrwxr-x 3 sysadm sysadm 4096 May 18 12:48 data
-rw-rw-r-- 1 sysadm sysadm    0 May  3 00:16 deploy_app.py
-rw-rw-r-- 1 sysadm sysadm 5357 May 18 16:30 eval_retrieval.py
-rw-rw-r-- 1 sysadm sysadm    0 May  3 00:16 evaluate_model.py
-rw-rw-r-- 1 sysadm sysadm 3247 May 18 15:47 inference_plot.py
-rw-rw-r-- 1 sysadm sysadm 1814 May 18 16:22 inference_to_csv.py
-rw-rw-r-- 1 sysadm sysadm 1972 May 18 12:32 plot_losses.py
-rw-rw-r-- 1 sysadm sysadm 8728 May 14 16:53 preprocess_data.py
drwxrwxr-x 2 sysadm sysadm 4096 May 18 14:25 __pycache__
-rw-rw-r-- 1 sysadm sysadm 1144 May 18 16:48 run_inference_pipeline.py
-rw-rw-r-- 1 sysadm sysadm 2167 May 18 15:35 run_search_engine.py
-rw-rw-r-- 1 sysadm sysadm 2461 May 18 14:23 save_embedding.py


./scripts/__pycache__:
total 8
-rw-rw-r-- 1 sysadm sysadm 3048 May 14 16:54 preprocess_data.cpython-310.pyc
-rw-rw-r-- 1 sysadm sysadm 2805 May 18 14:25 save_embedding.cpython-310.pyc

./src:
total 24
drwxrwxr-x 3 sysadm sysadm 4096 May  3 14:59 core
-rw-rw-r-- 1 sysadm sysadm    0 May  3 14:57 __init__.py
-rw-rw-r-- 1 sysadm sysadm  694 May 18 17:09 llm_providers.py
drwxrwxr-x 3 sysadm sysadm 4096 May  3 16:52 pipeline
drwxrwxr-x 2 sysadm sysadm 4096 May  3 14:59 __pycache__
drwxrwxr-x 2 sysadm sysadm 4096 May  3 12:14 tests
drwxrwxr-x 3 sysadm sysadm 4096 May  3 00:16 ui

./src/core:
total 8
drwxrwxr-x 2 sysadm sysadm 4096 May 18 15:28 __pycache__
-rw-rw-r-- 1 sysadm sysadm    0 May  3 00:16 report_generator.py
-rw-rw-r-- 1 sysadm sysadm 3189 May 18 15:27 search_engine.py
-rw-rw-r-- 1 sysadm sysadm    0 May  3 00:16 utils.py

./src/core/__pycache__:
total 4
-rw-rw-r-- 1 sysadm sysadm 2809 May 18 15:28 search_engine.cpython-310.pyc

./src/pipeline:
total 16
-rw-rw-r-- 1 sysadm sysadm  803 May 18 16:49 blip_captioning.py
-rw-rw-r-- 1 sysadm sysadm  342 May 18 16:50 clip_retrieval.py
-rw-rw-r-- 1 sysadm sysadm  441 May 18 16:53 llm_report_generation.py
drwxrwxr-x 2 sysadm sysadm 4096 May  3 16:52 __pycache__

./src/pipeline/__pycache__:
total 12
-rw-rw-r-- 1 sysadm sysadm 804 May  3 16:52 blip_captioning.cpython-310.pyc
-rw-rw-r-- 1 sysadm sysadm 518 May  3 16:52 clip_retrieval.cpython-310.pyc
-rw-rw-r-- 1 sysadm sysadm 591 May  3 16:52 llm_report_generation.cpython-310.pyc

./src/__pycache__:
total 4
-rw-rw-r-- 1 sysadm sysadm 136 May  3 14:59 __init__.cpython-310.pyc


./src/ui:
total 12
-rw-rw-r-- 1 sysadm sysadm 5848 May 18 20:20 app.py
drwxrwxr-x 2 sysadm sysadm 4096 May  3 00:16 pages

./src/ui/pages:
total 32
-rw-rw-r-- 1 sysadm sysadm 5048 May 18 20:21 compare.py
-rw-rw-r-- 1 sysadm sysadm 3900 May 18 20:18 diagnosis.py
-rw-rw-r-- 1 sysadm sysadm    0 May  3 00:16 home.py
-rw-rw-r-- 1 sysadm sysadm 5753 May 18 20:17 reports.py
-rw-rw-r-- 1 sysadm sysadm 8475 May 18 20:19 search.py



In [None]:
# /home/sysadm/Music/MedXpert/src/pipeline/llm_report_generation.py
def generate_report(blip_captions, retrieved_texts, llm_fn):
    prompt = """
Below are findings extracted from multiple images and related radiology texts.

Image Findings (via BLIP):
"""
    for c in blip_captions:
        prompt += f"- {c}\n"
    
    prompt += "\nReport Texts (via CLIP):\n"
    for t in retrieved_texts:
        prompt += f"- {t}\n"

    prompt += "\nGenerate a summarized radiology report:"

    return llm_fn(prompt)


# /home/sysadm/Music/MedXpert/src/pipeline/blip_captioning.py

def generate_blip_captions(image_paths):
    from transformers import BlipProcessor, BlipForConditionalGeneration
    from PIL import Image
    import torch

    # Load BLIP model and processor
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")

    captions = []

    for img_path in image_paths:
        # Load and process each image
        image = Image.open(img_path).convert("RGB")
        inputs = processor(image, return_tensors="pt").to("cuda")
        
        # Generate caption
        output = model.generate(**inputs)
        caption = processor.decode(output[0], skip_special_tokens=True)
        captions.append(caption)

    return captions


# /home/sysadm/Music/MedXpert/src/pipeline/clip_retrieval.py
def retrieve_top_k(query, mode="text", k=5):
    from src.core.search_engine import search_image_by_text, search_text_by_image

    if mode == "text":
        return search_image_by_text(query, k=k)
    elif mode == "image":
        return search_text_by_image(query, k=k)
    else:
        raise ValueError("mode must be 'text' or 'image'")
