In [11]:
import cv2
import pytesseract
import os
import re

In [12]:
# Set the path to the Tesseract executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Tesseract\tesseract.exe'

def is_valid_container_code(text):
    pattern = r'[A-Z]{3}[UJZ][0-9]{6}[0-9]'
    return re.match(pattern, text) is not None

def check_and_merge_texts(boxes):
    texts = [boxes['text'][i].strip() for i in range(len(boxes['text'])) if int(boxes['conf'][i]) > 60]
    valid_texts = []
    for text in texts:
        if is_valid_container_code(text):
            valid_texts.append(text)
    if not valid_texts:  # If no individual valid text, try merging
        combined_text = ''.join(texts)
        if is_valid_container_code(combined_text):
            valid_texts.append(combined_text)
    return valid_texts

def extract_text_from_video(video_path, output_folder_base, index):
    cap = cv2.VideoCapture(video_path)
    frame_number = 0
    valid_codes = []  # Array to store valid container codes
    
    # Create a unique output folder for the video
    video_basename = os.path.splitext(os.path.basename(video_path))[0]
    output_folder = os.path.join(output_folder_base, video_basename+'_'+index)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            frame_number += 1
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            boxes = pytesseract.image_to_data(rgb, output_type=pytesseract.Output.DICT)
            
            valid_texts = check_and_merge_texts(boxes)
            for valid_text in valid_texts:
                if len(valid_text) > 11:
                    valid_text = valid_text[:11]  # Trim to a maximum of 11 characters
                valid_codes.append(valid_text)  # Store the valid code
                # Write the container code in the top-left corner of the frame
                cv2.putText(frame, valid_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
                
                break  # For simplicity, considering the first valid code per frame
            
            # Optionally save frames here as per your requirement
            if valid_texts:
                print(f"Video: {output_folder}")
                frame_path = os.path.join(output_folder, f"frame_{frame_number}.jpg")
                cv2.imwrite(frame_path, frame)  # Save the frame
            
        else:
            break
    cap.release()
    return valid_codes  # Return the list of valid codes

def calculate_success_rate(valid_codes, expected_code):
    total_attempts = len(valid_codes)
    successful_attempts = sum(1 for code in valid_codes if code == expected_code)
    success_rate = (successful_attempts / total_attempts) * 100 if total_attempts else 0
    return success_rate, successful_attempts, total_attempts

# List of input videos
input_videos = [
    './input/container_truck.mp4',
]

valid_strings = [
    'MEDU9627951',
]
    # './input/container_truck.mp4',
    # 'MEDU9627951',
    # 'FTAU1347890',
output_base_directory = './output'
# FTAU1347890
for index, video_path in enumerate(input_videos):
    valid_codes = extract_text_from_video(video_path, output_base_directory, str(index))
    success_rate, successful_attempts, total_attempts = calculate_success_rate(valid_codes, valid_strings[index])
    print(f"Video: {video_path}")
    print(f"Success Rate: {success_rate}% ({successful_attempts}/{total_attempts} successful attempts)\n")

Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./output\container_truck_0
Video: ./input/container_truck.mp4
Success Rate: 63.63636363636363% (7/11 successful attempts)

