In [11]:
import cv2
import pytesseract
import os
import re

In [12]:
# Set the path to the Tesseract executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Tesseract\tesseract.exe'

def is_valid_container_code(text):
    pattern = r'[A-Z]{3}[UJZ][0-9]{6}[0-9]'
    return re.match(pattern, text) is not None

def try_extract_text_with_different_psm(rgb_image, psm_modes=[6, 5]):
    """Attempt to extract text using different PSM modes."""
    for psm in psm_modes:
        config = f'--oem 3 --psm {psm}'
        text = pytesseract.image_to_string(rgb_image, lang='eng', config=config).strip()
        if text:
            return text
    return ''

def check_and_merge_texts(boxes):
    texts = [text.strip() for text in boxes if text.strip()]
    print(f"Detected texts: {texts}")
    valid_texts = []
    for text in texts:
        if is_valid_container_code(text):
            valid_texts.append(text)
    if not valid_texts:  # If no individual valid text, try merging
        combined_text = ''.join(texts)
        if is_valid_container_code(combined_text):
            valid_texts.append(combined_text)
    return valid_texts

def extract_text_from_video(video_path, output_folder_base, index):
    cap = cv2.VideoCapture(video_path)
    frame_number = 0
    valid_codes = []  # Array to store valid container codes
    
    # Create a unique output folder for the video
    video_basename = os.path.splitext(os.path.basename(video_path))[0]
    output_folder = os.path.join(output_folder_base, video_basename + '-' + index)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            frame_number += 1
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # First, try the default extraction
            boxes = pytesseract.image_to_data(rgb, output_type=pytesseract.Output.DICT)
            texts = [box.strip() for box in boxes['text'] if box.strip()]
            
            # If no valid text is detected, try with different PSM values
            if not any(is_valid_container_code(text) for text in texts):
                text = try_extract_text_with_different_psm(rgb)
                if text:
                    texts = [text]
            
            valid_texts = check_and_merge_texts(texts)
            for valid_text in valid_texts:
                if len(valid_text) > 11:
                    valid_text = valid_text[:11]  # Trim to a maximum of 11 characters
                valid_codes.append(valid_text)  # Store the valid code
                # Additional processing...
            
            # Example saving code or drawing rectangles goes here
            
        else:
            break
    cap.release()
    return valid_codes  # Return the list of valid codes

def calculate_success_rate(valid_codes, expected_code):
    total_attempts = len(valid_codes)
    successful_attempts = sum(1 for code in valid_codes if code == expected_code)
    success_rate = (successful_attempts / total_attempts) * 100 if total_attempts else 0
    return success_rate, successful_attempts, total_attempts

# List of input videos
input_videos = [
    './input/container_trolley.mp4',
]
valid_strings = [
    'FTAU1347890',
]
output_base_directory = './output'

for index, video_path in enumerate(input_videos):
    valid_codes = extract_text_from_video(video_path, output_base_directory, str(index))
    success_rate, successful_attempts, total_attempts = calculate_success_rate(valid_codes, valid_strings[index])
    print(f"Video: {video_path}")
    print(f"Success Rate: {success_rate}% ({successful_attempts}/{total_attempts} successful attempts)\n")


Detected texts: [': | A cet. en 3 ue a | Pek | Mth 14\n| | le oS q x San | | Is: ri Baliea\ns Pini ial | f ; &) | | ilk 1 a ae\nhoe A Ss. | Sieceliae|| Fe\nV4, Z =< = = —e mi Seelerranlaaein | i mb: - b Bre mn eel a ee\n= / To i ER iin ee\niZ — = 4 = Salas me ae | Ae me\n. Sy > [ i i so ream ee | | \\| i speller al i\n‘a | a be fel ls = AIL | Fe oo\nE ; i | | | be = a IE as Bs = < K J a e M { a |. a\nape hii >= aaa i a tN eet i aA es a i 5 MMAR, occcocoe a\nwet NN [ TAF } ewacem,.. | a al \\ in, = a tt | ; my } vWUI Fea AM\nNY) a { | cmalc TH al i | ye eC) : - 4A a | | eee i\nA oe bee fraeeta || ae ||f Ham MSY > es Aja = ‘| eeiieale\nWaT cavern iiz pee ee | WW ie\n: os et / i. cMaccm._.|: CE MAN a ——————— =e | UAT Mattel ee\nSS ie [ee ; ma Mill! = (4 L fi aor J 5 mS\nSa ee tO Ne setae = ate be\n‘iia ——— ——— * I) 3 | -\\eN cn ff me “ Zz\n= — . | PC a — a\nx = eee —— ig ; = = y Ye > = ; =\n— ee) | : pe\n2 es ie = ts ==']
Detected texts: ['. | Baie Weil mt elioal (| sla\n: on |e e& ly Eli