
## OCR on Images
### OCR with pyTesseract 
#### On whole image

In [5]:
from pytesseract import pytesseract
import cv2
import numpy as np

def processImage(path_to_image, path_to_output):
    pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract.exe'
    image=cv2.imread(path_to_image)
    #Converting image to gray level
    gray=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
   
    #Improve contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    clahe_gray=clahe.apply(gray)
    cv2.imwrite(path_to_output + '/1.clayTest.jpg', clahe_gray)
    
    #Apply adaptive thresholding
    treshImage=cv2.adaptiveThreshold(clahe_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    cv2.imwrite(path_to_output + '/2.treshTest.jpg', treshImage)
    
    #Extract contours
    contours, hierarchy = cv2.findContours(treshImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    print('Number of contours:', len(contours))
    
    #Convert back to color image
    imContour=treshImage.copy()
    imContour = cv2.cvtColor(imContour, cv2.COLOR_GRAY2BGR)
    cv2.drawContours(imContour, contours, -1, (255,0,0), thickness=1)
    cv2.imwrite(path_to_output +'/3.contourTest.jpg', imContour)
        
    
    myImage=cv2.cvtColor(treshImage.copy(), cv2.COLOR_GRAY2BGR)
    
    #Iterating over contours to detect potential bounding boxes
    potential_boxes=[]
    for contour in contours:
        (x,y,w,h) = cv2.boundingRect(contour)
        if w>50 and h>10 and w<250 and h<20:
            cv2.rectangle(myImage, (x,y), (x+w, y+h), (0,255,0), 2)
            potential_boxes.append((x,y,w,h))
            
    #Save image with potential bounding boxes
    cv2.imwrite(path_to_output + '/4.LargeTest.jpg', myImage)
    
    
    mybox=0 
    for (x,y,w,h) in potential_boxes:
        mybox+=1
        roi=myImage[y:y+h, x:x+w]
        config=('-l eng --oem 1 --psm 7')
        cv2.putText(myImage,str(mybox), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,255), 2)
        text=pytesseract.image_to_string(roi, config=config)
        print('Box number : ', mybox, 'Text:', text)
    cv2.imwrite(path_to_output + '/5.numTest.jpg', myImage)
    cv2.destroyAllWindows()
    return 

processImage('F:/Data_BachelorHES/3.Video_CNC/6.pyTesseractOCR/frame10075.jpg', 'F:/Data_BachelorHES/3.Video_CNC/6.pyTesseractOCR/fullImageOCR')  

Number of contours: 21533
Box number :  1 Text: 
Box number :  2 Text: ne mamas |
Box number :  3 Text: aes Seber
Box number :  4 Text: AOE ie ef
Box number :  5 Text: La Te |
Box number :  6 Text: Gi cc ae
Box number :  7 Text: fo% Se |

Box number :  8 Text: 
Box number :  9 Text: TAL
Box number :  10 Text: Pe DUREE: SOUS. TENSI OIG83: 125245,
Box number :  11 Text: 
Box number :  12 Text: oe ee et
Box number :  13 Text: EIT sma re

Box number :  14 Text: 



#### On cropped image


In [26]:
def preprocess_with_closing_opening(image, kernel_size=(3,3)):
    """
    Preprocess the image by applying morphological closing followed by opening.
    """
    # Morphological Closing: Fill gaps within characters
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT,kernel_size)
    closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=1)
    
    # Morphological Opening: Remove noise
    opening = cv2.morphologyEx(closing, cv2.MORPH_OPEN, kernel, iterations=1)
    
    return opening

In [17]:
def OCRonCroppedImages(myImagePath, myImageName, destIntermedDir):
    #Prepare image Name for testing
    myImageNameRaw=myImageName.split('.')[0]
    
    
    #Define path to tesseract
    pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract.exe'
    
    #load Image in grayScale
    originalImage = cv2.imread(myImagePath+myImageName, cv2.IMREAD_GRAYSCALE)
    
    #Apply scaling of the image
    #Apply CLAHE to improve contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    claheImg=clahe.apply(originalImage)
    
    #Apply adaptive thresholding
    threshImage=cv2.adaptiveThreshold(claheImg, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

    
    #Apply morphological opening / closing
    cleanedImage=preprocess_with_closing_opening(threshImage,(1,1))
    
        #Find contours and save a copy with green contours
    contours, hierarchy = cv2.findContours(cleanedImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    cleanedImage=cv2.cvtColor(cleanedImage, cv2.COLOR_GRAY2BGR)
    
    #Result image
    resultImage = cv2.imread(myImagePath+myImageName)
     #Iterating over contours to detect potential bounding boxes
    potential_boxes=[]
    print(f"number of contours: {len(contours)}")
    for contour in contours:
        (x,y,w,h) = cv2.boundingRect(contour)
        #Draw boundingBoxes on image in green
        cv2.rectangle(resultImage, (x,y), (x+w, y+h), (0,255,0), 1)  
        if w>20 and h>5 and w<250 and h<200: 
            #Draw relevant bounding boxes on image in red
            cv2.rectangle(resultImage, (x,y), (x+w, y+h), (0,0,255), 2)    
            area = w*h
            potential_boxes.append((x,y,w,h))
    cv2.imwrite('{}boxes_{}'.format(destIntermedDir, myImageName), resultImage)
    
    text=[]
    for idx,(x,y,w,h) in enumerate(potential_boxes):
        roi=cv2.cvtColor(cleanedImage[y:y+h, x:x+w], cv2.COLOR_BGR2GRAY)
        
        #preprocess ROI for better OCR quality
        roi=cv2.resize(roi, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
        roi=cv2.GaussianBlur(roi, (5,5), 0) #reduce noise inside the ROI
        ret, roi = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        
        #Process bounding box and display recognized characters
        config=('-l fra --oem 1 --psm 7')
        text.append(pytesseract.image_to_string(roi, config=config).strip())
        print('Box number : ', idx+1, '   Text:', text[idx], '    Box coordinate (x,y,w,h) : (', x, y, w, h, ')')

OCRonCroppedImages('F:/Data_BachelorHES/3.Video_CNC/6.pyTesseractOCR/','cropped_frame_10075.jpg', 'F:/Data_BachelorHES/3.Video_CNC/6.pyTesseractOCR/croppedImageOCR/')    



number of contours: 1868
Box number :  1    Text: fus Ka”     Box coordinate (x,y,w,h) : ( 82 235 31 9 )
Box number :  2    Text: ar 2     Box coordinate (x,y,w,h) : ( 24 188 26 10 )
Box number :  3    Text: MISE X-05. Sd 2 =     Box coordinate (x,y,w,h) : ( 12 74 82 18 )
Box number :  4    Text: môsea     Box coordinate (x,y,w,h) : ( 198 3 28 9 )
Box number :  5    Text: HEC     Box coordinate (x,y,w,h) : ( 82 0 36 10 )


## OCR using PaddleOCR

In [30]:
#To install paddle OCR, run this line in the terminal

#pip install paddleocr --user


### First Test on whole image without preprocessing

In [19]:
from paddleocr import PaddleOCR


def paddle_ocr(my_image_path, my_image_name):
    srcImg= my_image_path + my_image_name
    
    ocr=PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
    
    results=ocr.ocr(srcImg, cls=True)
    print(results)
    for line in results[0]:
        txt, confidence = line[1]
        print(f'Recognized text : {txt}, confidence: {confidence}')
    return results
    

myResult=paddle_ocr('F:/Data_BachelorHES/3.Video_CNC/7.paddleOCR/','frame10075.jpg')    

[2025/03/01 12:02:37] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\Julie/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\Julie/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=

In [22]:
import os 

os.environ["OMP_NUM_THREADS"] = "8"

#Call the OCR model and download it
ocr=PaddleOCR(use_angle_cls=True, lang='en', use_gpu="False", cpu_threads=8) 


### Second Test on cropped image with preprocessing

In [24]:

import logging

# Désactiver les logs de PaddleOCR
logging.getLogger("ppocr").setLevel(logging.ERROR)


def paddleOCRWhiteRegions (myImagePath, myImageName, destIntermedDir, whiteThresh=0.45):
   
    srcImg=myImagePath+myImageName
   
    Img=cv2.imread(srcImg, cv2.IMREAD_COLOR)
   
    Img=cv2.resize(Img,(0,0), fx=2.0, fy=2.0, interpolation=cv2.INTER_NEAREST) 
   
    grayImg=cv2.cvtColor(Img, cv2.COLOR_BGR2GRAY)
   
    threshImage=cv2.adaptiveThreshold(grayImg, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 5)
    
    ocrPath=destIntermedDir+"ocr_image.png"
    cv2.imwrite(ocrPath, Img)
    
    
    myDictInfos = {
        'ImageName': myImageName,
        'Frame_Number': int(myImageName.split('_')[-1].split('.')[0]),
        'Detected_Text': {
            'Text_Box_(x,y,w,h)': [],
            'Text': [],
            'Confidence': [],
            'White_Ratio': [],
            'FilePath': []
        }
    }

    
    results=ocr.ocr(ocrPath, cls=True)
    
    if not results[0]:
        print("No text detected")
        return myDictInfos
    for line in results[0]:
        coord, (txt, confidence) = line
        xMin=int(min([point[0] for point in coord]))
        xMax=int(max([point[0] for point in coord]))
        yMin=int(min([point[1] for point in coord]))
        yMax=int(max([point[1] for point in coord]))
        region=threshImage[yMin:yMax, xMin:xMax]
        h,w=yMax-yMin, xMax-xMin
        totalPixels = region.size
        whitePixels = cv2.countNonZero(region)
        whiteRatio = whitePixels / float(totalPixels)
        
        if whiteRatio>=whiteThresh and (xMin>300 or yMin>20):
                myRegionImagePath=destIntermedDir+'Img_{}_Region_{}_WRatio_{}_txt_{}_conf_{}.png'.format(myDictInfos['Frame_Number'],(xMin,yMin,w,h), np.round(whiteRatio,2), txt, np.round(confidence,3))
                myDictInfos['Detected_Text']['Text_Box_(x,y,w,h)'].append((xMin,yMin,w,h))
                
                myDictInfos['Detected_Text']['Text'].append(txt)
                
                myDictInfos['Detected_Text']['Confidence'].append(confidence)
                
                myDictInfos['Detected_Text']['White_Ratio'].append(whiteRatio)
                
                myDictInfos['Detected_Text']['FilePath'].append(myRegionImagePath)
                
                cv2.imwrite(myRegionImagePath,region)
                
    return myDictInfos


myResultWhite= paddleOCRWhiteRegions ('F:/Data_BachelorHES/3.Video_CNC/7.paddleOCR/','cropped_frame_10075.jpg', 'F:/Data_BachelorHES/3.Video_CNC/7.paddleOCR/croppedImageOCR/',)

In [25]:
print(myResultWhite)

{'ImageName': 'cropped_frame_10075.jpg', 'Frame_Number': 10075, 'Detected_Text': {'Text_Box_(x,y,w,h)': [(395, 6, 98, 20), (24, 147, 162, 20)], 'Text': ['NO0003390', '3390-85.322'], 'Confidence': [0.9211666584014893, 0.9037150740623474], 'White_Ratio': [0.5331632653061225, 0.557716049382716], 'FilePath': ['F:/Data_BachelorHES/3.Video_CNC/7.paddleOCR/croppedImageOCR/Img_10075_Region_(395, 6, 98, 20)_WRatio_0.53_txt_NO0003390_conf_0.921.png', 'F:/Data_BachelorHES/3.Video_CNC/7.paddleOCR/croppedImageOCR/Img_10075_Region_(24, 147, 162, 20)_WRatio_0.56_txt_3390-85.322_conf_0.904.png']}}
