In [67]:
import google.generativeai as genai
from PIL import Image
import os
from dotenv import load_dotenv
from pathlib import Path

# Load environment variables
load_dotenv()

# Configure Gemini API
genai.configure(api_key='AIzaSyCP_ooSoINwK60YQEduG5PPOP--i9KL0jg')

# Initialize the model
model = genai.GenerativeModel('gemini-1.5-flash')

def extract_prescription_text(image_path):
    """
    Extract text from handwritten medical prescription using Gemini Vision
    
    Args:
        image_path (str): Path to the prescription image
    
    Returns:
        str: Extracted text from the prescription
    """
    try:
        # Load and prepare the image
        image = Image.open(image_path)
        
        # Create prompt for better context
        prompt = """
        Please analyze this medical prescription image and extract all the handwritten and printed text. 
        Format the output in the following manner:
        1) Patient Name: [Name]
        2) Date: [Date]
        3) Age: [Age]
        4) Gender: [Gender]
        5) Weight: [Weight]
        6) Address: [Address]
        7) Diagnosis: [Diagnosis details]
        8) Prescription:
            [Medicine name] | [Dosage Instructions] | [Dosage count]
            [Medicine name] | [Dosage Instructions] | [Dosage count]
            [Medicine name] | [Dosage Instructions] | [Dosage count]
        9) Doctor Details: [Details]
        10) Miscellaneous: [Any other information]
        
        Aim to be as precise in the transcription as possible. Do not add any additional information apart from what is written.
        If any information is of the above details are not available, mention it as 'Not Available'.
        The medicine name , must include the name of the medicine, the type of medication (tablet, capsule, syrup, etc.), and the strength of the medicine (eg. mg per tablet).
        Dosage instructions may be in multiple formats, either as abbreviations (TDS etc.) or as full words (Three times a day) or as markings (1-0-1). It may also include the count or duration of the dosages.
        The dosage count first must print the number of doses each day, then print the duration (in days) of the medication. If the duration of the medication is unclear, Return the answer in the form of doses per day.
        The diagnostic details may include the patient's condition, symptoms, or any other relevant information found in the prescription.
        The address must be the address of the patient and not the clinics address mentioned in a letterhead
        """
        
        # Generate response from Gemini
        response = model.generate_content([prompt, image])
        
        return response.text
        
    except Exception as e:
        return f"Error processing image: {str(e)}"

# Example usage
result = extract_prescription_text('data/124.jpg')
print(result)

1) Patient Name: Dalia Kundu
2) Date: 19-02-2021
3) Age: 64
4) Gender: F
5) Weight: Not Available
6) Address: Not Available
7) Diagnosis: 2D Echo - (Bone/Lung/Liver)
8) Prescription:
    6# (P+H) | Not Available | Not Available
    1m Xgeva (120) | alt # | Not Available
9) Doctor Details: Tanmoy Kumar Mandal, Regn No: WBMC-63430, MD(Internal Medicine), DM(Medical Oncology), MRCP(Medical Oncology)
10) Miscellaneous:  Adv ow Dis, HMW - TDS, Closeu - TDS, T. Dexa(4) - BD D2-D4, T. Ondan(4) - BD D2-D4, T. Ultracet - BD x 5 days, CBC, Give C (P+H) ow 10/2/21, R/S ow 3/3/21 : CBC for (2


In [68]:
import csv
from fuzzywuzzy import process
import fuzzywuzzy
import re

import fuzzywuzzy.fuzz

# List of generic terms to ignore
GENERIC_TERMS = {
    'tablet', "tab", 'syrup', 'syr', 'capsule', 'cap', 'injection', 'inj', 'cream', 'gel', 'drops', 'spray',
    'mg', 'ml', 'gm', 'g', 'kg', 'l', 'iu', 'mcg', 'microgram', 'milligram', 'gram', 'kilogram', 'liter',
    'oral', 'topical', 'solution', 'suspension', 'powder', 'ointment', 'liquid', 'patch', 'inhaler'
}

def extract_drug_name(name):
    """
    Extract the meaningful drug name by removing generic terms and numbers.
    
    Args:
        name (str): The medicine name to process.
    
    Returns:
        str: The extracted drug name.
    """
    # Remove numbers and special characters
    name = re.sub(r'\d+', '', name)  # Remove numbers
    name = re.sub(r'[^\w\s]', '', name)  # Remove special characters
    
    # Split the name into words and filter out generic terms
    words = [word for word in name.lower().split() if word not in GENERIC_TERMS]
    return ' '.join(words)


def load_medicine_db(csv_file):
    """
    Load medicine database from a CSV file.
    
    Args:
        csv_file (str): Path to the CSV file containing medicine data.
    
    Returns:
        dict: A dictionary containing medicine data.
    """
    medicine_db = {}
    
    try:
        with open(csv_file, mode='r', newline='', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            print("CSV file opened successfully.")  # Debugging
            
            for row in reader:
                #print("Processing row:", row)  # Debugging
                # Extract the drug name
                key = extract_drug_name(row['name'])
                medicine_db[key] = {
                    'big_name': row['name'],
                    'name': extract_drug_name(row['name']),
                    'short_composition1': row['short_composition1'].split(', ') if row['short_composition1'] else [],
                    'short_composition2': row['short_composition2'].split(', ') if row['short_composition2'] else [],
                }
        
        print("Medicine database loaded successfully.")  # Debugging
        return medicine_db
    
    except FileNotFoundError:
        print(f"Error: The file '{csv_file}' was not found.")
        return {}
    except KeyError as e:
        print(f"Error: Missing expected column in CSV file: {e}")
        return {}
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return {}


def verify_medicine(medicine_name, medicine_db, threshold=80, length_threshold=5):
    """
    Verify if a medicine exists in the database using fuzzy matching and length-based filtering.
    
    Args:
        medicine_name (str): Name of the medicine to verify.
        medicine_db (dict): Medicine database loaded from CSV.
        threshold (int): Minimum similarity score for a match (0-100).
        length_threshold (int): Maximum allowed length difference between input and matched name.
    
    Returns:
        dict: Verification result with status, info, or suggestions.
    """
    # Extract the drug name from the input medicine name
    processed_name = extract_drug_name(medicine_name)
    print(processed_name)
    
    # Use fuzzywuzzy to find the best match
    best_match, score = process.extractOne(processed_name, medicine_db.keys(), scorer= fuzzywuzzy.fuzz.ratio)
    
    # Calculate the length difference
    input_length = len(processed_name)
    match_length = len(best_match)
    length_difference = abs(input_length - match_length)
    
    # Check if the best match meets the thresholds
    if  score >= threshold and length_difference <= length_threshold:
        return {
            'status': 'found',
            'info': medicine_db[best_match],
            'match_score': score,  # Optional: Include the match score for debugging
            'length_difference': length_difference  # Optional: Include the length difference for debugging
        }
    else:
        return {
            'status': 'not found',
            'suggestions': []  # No suggestions for now
        }


# Load medicine database from CSV
medicine_db = load_medicine_db('data.csv')


prescription_section = None
for line in result.split('\n'):
    if line.strip().startswith('8) Prescription:'):
        prescription_section = line.strip()
        break

if not prescription_section:
    print("No prescription section found in the text.")
else:
    # Extract medicine names from the prescription section
    medicine_lines = result.split('8) Prescription:')[1].strip().split('\n')
    for line in medicine_lines:
        if '|' in line:  # Ensure it's a medicine line
            medicine_name = line.split('|')[0].strip()
            print(f"Verifying medicine: {medicine_name}")
            verification_result = verify_medicine(medicine_name, medicine_db)
            print(f"Verification result for {medicine_name}:")
            print(verification_result)
          

CSV file opened successfully.
Medicine database loaded successfully.
Verifying medicine: 6# (P+H)
ph
Verification result for 6# (P+H):
{'status': 'found', 'info': {'big_name': 'PAH 20 Tablet', 'name': 'pah', 'short_composition1': ['Sildenafil (20mg)'], 'short_composition2': []}, 'match_score': 80, 'length_difference': 1}
Verifying medicine: 1m Xgeva (120)
m xgeva
Verification result for 1m Xgeva (120):
{'status': 'not found', 'suggestions': []}
