In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import base64
import json
import re
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from collections import OrderedDict
import time
import cv2
import numpy as np
from PIL import Image
import os
import io
import logging

app = Flask(__name__)
CORS(app)  # This line enables CORS for all routes

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def process_local_image(image_path):
    try:
        if not os.path.exists(image_path):
            raise FileNotFoundError(f"Image file not found: {image_path}")

        # Read image with OpenCV
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError("Failed to read image with OpenCV")

        # Convert to RGB (OpenCV uses BGR by default)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Perform image preprocessing
        image_preprocessed = preprocess_image(image_rgb)

        # Convert numpy array to bytes
        is_success, img_buf_arr = cv2.imencode(".jpg", image_preprocessed)
        if not is_success:
            raise ValueError("Failed to encode image")

        byte_im = img_buf_arr.tobytes()

        if len(byte_im) == 0:
            raise ValueError("Processed image is empty")

        return byte_im

    except Exception as e:
        logging.error(f"Error processing image: {str(e)}")
        raise

def preprocess_image(image):
    # Resize image if it's too large
    max_size = 1600
    height, width = image.shape[:2]
    if height > max_size or width > max_size:
        scale = max_size / max(height, width)
        image = cv2.resize(image, (int(width * scale), int(height * scale)))

    # Apply denoising
    image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)

    # Enhance contrast
    lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    limg = cv2.merge((cl,a,b))
    image = cv2.cvtColor(limg, cv2.COLOR_LAB2RGB)

    return image

def analyze_document_from_file(image_path, api_key):
    genai.configure(api_key=api_key)

    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config={
            "temperature": 0.2,
            "top_p": 1,
            "top_k": 32,
            "max_output_tokens": 4096,
        }
    )

    image_data = process_local_image(image_path)
    image_part = {"mime_type": "image/jpeg", "data": base64.b64encode(image_data).decode('utf-8')}

    prompt_template = """
    Analyze this document image in detail. Extract and categorize information based on the following criteria:

    1. Text Analysis:
       - Identify all text in the image.
       - Detect any underlined text and specify which words/phrases are underlined.
       - Identify segmented text (text split into separate boxes or sections).
       - Detect if any text is cut off or partially visible.
       - Identify any crossed-out or strikethrough text.

    2. Symbol Detection:
       - Detect the presence of checkboxes and whether they are checked or unchecked.
       - Identify any stars (★) or asterisks (*) near text and specify their location (before or after the text).

    3. Shape Analysis:
       - Identify any shapes present in the document (circles, squares, triangles, etc.).
       - Describe the relationship between shapes and nearby text.

    4. Medication Information (if applicable):
       - Extract all mentions of medications (e.g., med1, med2, med3, med4).
       - For each medication, provide details and any associated shapes or symbols.

    5. Form Structure:
       - Identify form fields, labels, and their corresponding values.
       - Detect any tables and describe their content.

    6. Special Cases:
       - Note any handwritten text and its location.
       - Identify any logos, stamps, or signatures.
       - Detect any QR codes or barcodes.

    7. Image Quality Assessment:
       - Evaluate the overall image quality (e.g., clear, blurry, skewed).
       - Note any issues like poor contrast, shadows, or reflections.

    Provide the analysis in the following JSON format:
    {
        "text_analysis": {
            "all_text": [],
            "underlined_text": [],
            "segmented_text": [],
            "cut_off_text": [],
            "crossed_out_text": []
        },
        "symbol_detection": {
            "checkboxes": [
                {"text": "", "status": "checked/unchecked"}
            ],
            "stars_asterisks": [
                {"text": "", "symbol": "★/*", "position": "before/after"}
            ]
        },
        "shape_analysis": [
            {"shape": "", "description": "", "associated_text": ""}
        ],
        "medication_info": {
            "med1": {"mentions": [], "details": "", "associated_elements": ""},
            "med2": {"mentions": [], "details": "", "associated_elements": ""},
            "med3": {"mentions": [], "details": "", "associated_elements": ""},
            "med4": {"mentions": [], "details": "", "associated_elements": ""}
        },
        "form_structure": {
            "fields": [
                {"label": "", "value": ""}
            ],
            "tables": [
                {"description": "", "content": ""}
            ]
        },
        "special_cases": {
            "handwritten_text": [],
            "logos_stamps_signatures": [],
            "qr_barcodes": []
        },
        "image_quality": {
            "overall_quality": "",
            "issues": []
        },
        "confidence_score": 0
    }

    Analyze the image thoroughly and fill in all relevant sections of the JSON structure. If a section is not applicable, leave it as an empty list or string as appropriate.
    """

    prompt_parts = [prompt_template, image_part]

    max_retries = 5
    for attempt in range(max_retries):
        try:
            response = model.generate_content(prompt_parts)
            if not response or not response.text:
                raise ValueError("Empty response from model")

            response_text = re.sub(r'^```json\s*|\s*```$', '', response.text.strip())
            response_text = re.sub(r'^```\s*|\s*```$', '', response_text).replace('\n', ' ').replace('\r', '').strip()

            result = json.loads(response_text)
            
            # Add confidence score based on the number of retries
            result['confidence_score'] = 1 - (attempt / max_retries)
            
            return result

        except json.JSONDecodeError as je:
            logging.warning(f"JSON decoding error (attempt {attempt + 1}): {str(je)}")
        except Exception as e:
            logging.warning(f"Error during analysis (attempt {attempt + 1}): {str(e)}")

        if attempt < max_retries - 1:
            wait_time = 2 ** attempt
            logging.info(f"Retrying in {wait_time} seconds...")
            time.sleep(wait_time)

    raise Exception("Failed to analyze document after maximum retries")

@app.route('/analyze', methods=['POST'])
def analyze_document():
    try:
        # Get the image path from the request
        data = request.json
        image_path = data.get('image_path')
        
        if not image_path:
            return jsonify({"error": "No image path provided"}), 400
        
        # Replace with your actual Gemini API key
        api_key = "AIzaSyCQrYGVRTNivr4Dh_xhJLkVovy6kDEFhKY"
        
        # Analyze the document
        result = analyze_document_from_file(image_path, api_key)
        
        return jsonify(result)
    
    except Exception as e:
        logging.error(f"An error occurred: {str(e)}")
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    app.run(debug=True, use_reloader=False)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
2024-10-12 11:45:54,844 - INFO - [33mPress CTRL+C to quit[0m


In [1]:
pip install opencv-python

Collecting opencv-pythonNote: you may need to restart the kernel to use updated packages.

  Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84
