<a href="https://colab.research.google.com/github/SOHAM-3T/Medical-Prescription-Analyzer-/blob/main/Vaidya_AI_alpha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

soham3ripathy_prescription_data_set_path = kagglehub.dataset_download('soham3ripathy/prescription-data-set')

print('Data source import complete.')


In [None]:
# Step 1: Install Required Libraries

print("Installing Hugging Face Transformers and other required libraries...")
# We need sentencepiece for the tokenizer and difflib for evaluation
!pip install -q transformers torch sentencepiece pillow

import torch
from transformers import DonutProcessor, VisionEncoderDecoderModel, pipeline
from PIL import Image
import os
import json
from difflib import SequenceMatcher # To compare the OCR output with ground truth
import numpy as np # Import numpy to handle special data types
import re

print("Installation and imports complete.")

In [None]:
# Step 2: Define the Local OCR (Reader) Function

try:
    print("Loading Local OCR Model (Naver-Clova Donut)...")
    ocr_processor = DonutProcessor.from_pretrained('naver-clova-ix/donut-base-finetuned-cord-v2')
    ocr_model = VisionEncoderDecoderModel.from_pretrained('naver-clova-ix/donut-base-finetuned-cord-v2')
    print("OCR Model loaded successfully.")
except Exception as e:
    print(f"Error loading OCR model: {e}")
    ocr_model = None

def run_local_ocr(image_path):
    """
    Takes an image path, runs it through a local Donut model, and returns the transcribed text.
    """
    if not ocr_model:
        return "OCR Model not loaded. Cannot process image."

    try:
        image = Image.open(image_path).convert("RGB")

        # Prepare image for model
        pixel_values = ocr_processor(image, return_tensors="pt").pixel_values

        # Prepare decoder inputs
        task_prompt = "<s_cord-v2>"
        decoder_input_ids = ocr_processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids

        # Move model and inputs to GPU if available
        device = "cuda" if torch.cuda.is_available() else "cpu"
        ocr_model.to(device)
        pixel_values = pixel_values.to(device)
        decoder_input_ids = decoder_input_ids.to(device)

        # Generate output
        outputs = ocr_model.generate(
            pixel_values,
            decoder_input_ids=decoder_input_ids,
            max_length=ocr_model.decoder.config.max_position_embeddings,
            pad_token_id=ocr_processor.tokenizer.pad_token_id,
            eos_token_id=ocr_processor.tokenizer.eos_token_id,
            use_cache=True,
            num_beams=1,
            bad_words_ids=[[ocr_processor.tokenizer.unk_token_id]],
            return_dict_in_generate=True,
        )

        # Decode the output sequence
        sequence = ocr_processor.batch_decode(outputs.sequences)[0]

        # --- NEW: More Robust Cleanup Logic ---
        # This regex finds all text content between the XML-like tags.
        # It handles the messy, structured output from the Donut model.
        text_snippets = re.findall(r'>([^<]+)<', sequence)

        # Join the snippets together to form the clean text
        clean_text = " ".join([snippet.strip() for snippet in text_snippets])

        return clean_text

    except Exception as e:
        return f"Error during OCR processing: {e}"


In [None]:
# Step 3: Define the Local NER (Analyst) Function

try:
    print("Loading Local NER Model (Biomedical NER)...")
    ner_pipeline = pipeline("token-classification", model="d4data/biomedical-ner-all", aggregation_strategy="simple")
    print("NER Model loaded successfully.")
except Exception as e:
    print(f"Error loading NER model: {e}")
    ner_pipeline = None

def run_local_ner(text):
    """
    Takes transcribed text, runs it through a local Biomedical NER model,
    and returns a structured list of entities.
    """
    if not ner_pipeline:
        return {"error": "NER Model not loaded."}

    try:
        ner_results = ner_pipeline(text)
        return ner_results
    except Exception as e:
        return {"error": f"Error during NER processing: {e}"}

# --- FIX: Creating a robust JSON serializer to prevent crashes ---
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)


In [None]:
# Step 4: Run the Full Pipeline & Evaluate


if __name__ == "__main__":
    # --- 1. SET YOUR IMAGE PATH ---
    TEST_IMAGE_PATH = "/kaggle/input/prescription-data-set/Prescription.jpg"

    # --- 2. PASTE YOUR GROUND TRUTH TEXT HERE ---
    GROUND_TRUTH_TEXT = "T. Doxycycline 100mg BD (6) T. Dolo 650mg BD (6) T. Rantac OD (3) T. Cetirizine OD (5) T. Vit C (5)"

    if not os.path.exists(TEST_IMAGE_PATH):
        print(f"\nERROR: Test image not found at '{TEST_IMAGE_PATH}'")
        print("Please upload your prescription image and update the path in the script.")
    else:
        print("\n" + "="*50)
        print("         STARTING LOCAL TRANSFORMER PIPELINE")
        print("="*50)

        # Stage 1: Run the Local Reader (OCR)
        print(f"\n[Stage 1] Reading text from image: {os.path.basename(TEST_IMAGE_PATH)}")
        transcribed_text = run_local_ocr(TEST_IMAGE_PATH)
        print(f"--> Model's Transcription: {transcribed_text}")

        # Stage 2: Run the Local Analyst (NER)
        print("\n[Stage 2] Analyzing text to find medical entities...")
        structured_data = run_local_ner(transcribed_text)

        print("\n" + "="*50)
        print("         PIPELINE COMPLETE - STRUCTURED DATA")
        print("="*50)
        # We now use our custom encoder (cls=NpEncoder) to print safely
        print(json.dumps(structured_data, indent=2, cls=NpEncoder))

        # --- Stage 3: Evaluation ---
        print("\n" + "="*50)
        print("         EVALUATION vs. GROUND TRUTH")
        print("="*50)

        print("\n--- OCR Performance ---")
        print(f"Model Output: {transcribed_text}")
        print(f"Ground Truth: {GROUND_TRUTH_TEXT}")

        similarity = SequenceMatcher(None, transcribed_text.lower(), GROUND_TRUTH_TEXT.lower()).ratio()
        print(f"\n--> Text Similarity Score: {similarity:.2%}")

        print("\n--- NER Performance ---")
        print("Below are the entities the model found. Compare them to what you expected from the ground truth.")
        print(json.dumps(structured_data, indent=2, cls=NpEncoder))

