In [1]:
from flask import Flask, request, jsonify
import cv2
import numpy as np
from paddleocr import PaddleOCR
import paddle
from flask_cors import CORS  # Enable CORS to allow mobile devices to connect
import difflib  # For similarity ratio calculation

# Force PaddleOCR to use CPU
paddle.set_device('cpu')  

app = Flask(__name__)
CORS(app)  # Enable CORS to avoid cross-origin issues

# Initialize OCR model
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)

# Helper functions for evaluation metrics

def word_accuracy(ocr_text, ground_truth):
    ocr_words = ocr_text.split()
    ground_truth_words = ground_truth.split()
    correct = sum(1 for w1, w2 in zip(ocr_words, ground_truth_words) if w1 == w2)
    return correct / len(ground_truth_words) if ground_truth_words else 0

def char_accuracy(ocr_text, ground_truth):
    ocr_chars = ''.join(ocr_text.split())
    ground_truth_chars = ''.join(ground_truth.split())
    correct = sum(1 for c1, c2 in zip(ocr_chars, ground_truth_chars) if c1 == c2)
    return correct / len(ground_truth_chars) if ground_truth_chars else 0

def similarity_ratio(ocr_text, ground_truth):
    return difflib.SequenceMatcher(None, ocr_text, ground_truth).ratio()

@app.route('/upload', methods=['POST'])
def upload_image():
    if 'image' not in request.files:
        return jsonify({'error': 'No image uploaded'}), 400

    file = request.files['image']
    image_bytes = np.frombuffer(file.read(), np.uint8)
    image = cv2.imdecode(image_bytes, cv2.IMREAD_COLOR)

    if image is None:
        return jsonify({'error': 'Invalid image file'}), 400

    # Convert to grayscale and apply adaptive thresholding for better OCR
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresholded = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                        cv2.THRESH_BINARY, 11, 2)
    image_rgb = cv2.cvtColor(thresholded, cv2.COLOR_GRAY2RGB)

    # Perform OCR
    result = ocr.ocr(image_rgb, cls=True)
    
    if not result:
        return jsonify({'error': 'No text detected'}), 200

    detected_text = [{'text': word[1][0], 'confidence': word[1][1]} for line in result for word in line]
    ocr_text = ' '.join([word[1][0] for line in result for word in line])

    # For evaluation, ground truth text should be provided, it can be a static string or passed with the request
    ground_truth = request.form.get('ground_truth', '').strip()  # Example of receiving ground truth with the request

    # If ground truth is provided, calculate the evaluation metrics
    metrics = {}
    if ground_truth:
        metrics = {
            'word_accuracy': word_accuracy(ocr_text, ground_truth),
            'char_accuracy': char_accuracy(ocr_text, ground_truth),
            'similarity_ratio': similarity_ratio(ocr_text, ground_truth)
        }

    print("🔍 OCR Detected Text:", detected_text)  # Debugging log

    return jsonify({
        'detected_text': detected_text,
        'metrics': metrics
    })

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000, debug=True)


[2025/04/15 10:12:45] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\USER/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\USER/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', r

 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.19.2.86:5000
[2025-04-15 10:12:48,292] [    INFO] _internal.py:97 - [33mPress CTRL+C to quit[0m
[2025-04-15 10:12:48,299] [    INFO] _internal.py:97 -  * Restarting with stat


SystemExit: 1

In [5]:
from paddleocr import PaddleOCR
ocr = PaddleOCR()
result = ocr.ocr('picture.png', cls=True)
print(result)

[2025/03/13 11:02:46] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=True, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\USER/.paddleocr/whl\\det\\ch\\ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\USER/.paddleocr/whl\\rec\\ch\\ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', re

RuntimeError: (PreconditionNotMet) The third-party dynamic library (cudnn64_8.dll) that Paddle depends on is not configured correctly. (error code is 126)
  Suggestions:
  1. Check if the third-party dynamic library (e.g. CUDA, CUDNN) is installed correctly and its version is matched with paddlepaddle you installed.
  2. Configure third-party dynamic library environment variables as follows:
  - Linux: set LD_LIBRARY_PATH by `export LD_LIBRARY_PATH=...`
  - Windows: set PATH by `set PATH=XXX; (at ..\paddle\phi\backends\dynload\dynamic_loader.cc:312)
