In [1]:
!pip install easyocr




[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os
import numpy as np
from PIL import Image
import easyocr
import re

# Initialize EasyOCR Reader
reader = easyocr.Reader(['en'])

# Parameters
desired_size = (224, 224)  # Example size

# Define file paths
input_folder = 'output'
train_images_file = 'train_images_a.txt'
val_images_file = 'val_images_a.txt'
test_images_file = 'test_images_a.txt'
train_mos_file = 'train_mos_a.txt'
val_mos_file = 'val_mos_a.txt'
test_mos_file = 'test_mos_a.txt'

def crop_to_mos_region(image_path):
    image = Image.open(image_path)
    width, height = image.size
    crop_box = (0, height * 0.8, width, height)
    cropped_image = image.crop(crop_box)
    return cropped_image

def extract_mos(image_path):
    cropped_image = crop_to_mos_region(image_path)
    cropped_image = cropped_image.convert('L')
    text = reader.readtext(np.array(cropped_image))
    
    mos_pattern = re.compile(r'(MOS|Mos|MoS)[\s:]*([\d.]+)')
    
    for t in text:
        match = mos_pattern.search(t[1])
        if match:
            try:
                mos_value = float(match.group(2).strip())
                return mos_value
            except ValueError:
                return None
    
    return None

def resize_image(image_path, size):
    img = Image.open(image_path)
    img = img.resize(size, Image.Resampling.LANCZOS)
    return img

def process_images(folder):
    # Track processed images and MOS values
    all_images = []
    all_mos = []

    # Get list of all image files
    image_files = [name for name in os.listdir(folder) if name.endswith(('.png', '.jpg', '.jpeg'))]
    
    # Calculate splits
    num_images = len(image_files)
    num_train = int(num_images * 0.6)
    num_val = int(num_images * 0.2)
    
    # Shuffle files to ensure randomness
    np.random.shuffle(image_files)
    
    # Define split indices
    train_files = image_files[:num_train]
    val_files = image_files[num_train:num_train + num_val]
    test_files = image_files[num_train + num_val:]
    
    # Open files to write directly
    with open(train_images_file, 'w') as train_img_f, \
         open(val_images_file, 'w') as val_img_f, \
         open(test_images_file, 'w') as test_img_f, \
         open(train_mos_file, 'w') as train_mos_f, \
         open(val_mos_file, 'w') as val_mos_f, \
         open(test_mos_file, 'w') as test_mos_f:
        
        for image_name in image_files:
            image_path = os.path.join(folder, image_name)

            # Extract MOS value
            mos_value = extract_mos(image_path)
            
            if mos_value is not None:
                # Resize image
                resized_image = resize_image(image_path, desired_size)
                resized_image_path = os.path.join(folder, f"resized_{image_name}")
                resized_image.save(resized_image_path)

                # Store image path and MOS value
                if image_name in train_files:
                    train_img_f.write("%s\n" % resized_image_path)
                    train_mos_f.write("%f\n" % mos_value)
                elif image_name in val_files:
                    val_img_f.write("%s\n" % resized_image_path)
                    val_mos_f.write("%f\n" % mos_value)
                elif image_name in test_files:
                    test_img_f.write("%s\n" % resized_image_path)
                    test_mos_f.write("%f\n" % mos_value)

# Execute the function to process images
print("Processing images...")
process_images(input_folder)
print("Processing complete.")


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Processing images...


In [4]:
import os
import numpy as np
from PIL import Image
import easyocr
from sklearn.model_selection import train_test_split
import re

# Initialize EasyOCR Reader
reader = easyocr.Reader(['en'])

# Path
input_folder = 'output'

# Parameters
desired_size = (224, 224)  # Example size

# File paths for saving results
train_images_file = 'train_images_n.txt'
val_images_file = 'val_images_n.txt'
test_images_file = 'test_images_n.txt'
train_mos_file = 'train_mos_n.txt'
val_mos_file = 'val_mos_n.txt'
test_mos_file = 'test_mos_n.txt'


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


In [5]:
def crop_to_mos_region(image_path):
    image = Image.open(image_path)
    width, height = image.size
    # Define the crop box: (left, upper, right, lower)
    crop_box = (0, height * 0.8, width, height)  # Adjust based on actual position
    cropped_image = image.crop(crop_box)
    return cropped_image

def extract_mos(image_path):
    cropped_image = crop_to_mos_region(image_path)
    cropped_image = cropped_image.convert('L')  # Convert to grayscale
    text = reader.readtext(np.array(cropped_image))
    print(f"OCR output for {image_path}:\n{text}")  # Print OCR output for debugging
    
    # Define a regex pattern to extract MOS value, accounting for different variations
    mos_pattern = re.compile(r'(MOS|Mos|MoS)[\s:]*([\d.]+)')
    
    # Extract MOS value from the text
    for t in text:
        match = mos_pattern.search(t[1])
        if match:
            try:
                mos_value = float(match.group(2).strip())
                return mos_value
            except ValueError:
                print(f"Unable to convert MOS value to float in {image_path}")
                return None
    
    print(f"MOS value not found in {image_path}")
    return None

def resize_image(image_path, size):
    img = Image.open(image_path)
    img = img.resize(size, Image.Resampling.LANCZOS)  # Use Resampling.LANCZOS instead of deprecated LANCZOS
    return img


In [6]:
def process_images(folder):
    # Track processed images and MOS values
    all_images = []
    all_mos = []

    # Open files to write directly
    with open(train_images_file, 'w') as train_img_f, \
         open(val_images_file, 'w') as val_img_f, \
         open(test_images_file, 'w') as test_img_f, \
         open(train_mos_file, 'w') as train_mos_f, \
         open(val_mos_file, 'w') as val_mos_f, \
         open(test_mos_file, 'w') as test_mos_f:
        
        for image_name in os.listdir(folder):
            if image_name.endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(folder, image_name)

                # Extract MOS value
                mos_value = extract_mos(image_path)
                
                if mos_value is not None:
                    # Resize image
                    resized_image = resize_image(image_path, desired_size)
                    resized_image_path = os.path.join(folder, f"resized_{image_name}")
                    resized_image.save(resized_image_path)

                    # Store image path and MOS value
                    all_images.append(resized_image_path)
                    all_mos.append(mos_value)

                    # Write to respective files immediately
                    if len(all_images) % 100 == 0:  # Adjust batch size if needed
                        for img_path, mos in zip(all_images, all_mos):
                            if len(all_images) * 0.6 > len(train_images):  # Assuming 60% for training
                                train_img_f.write("%s\n" % img_path)
                                train_mos_f.write("%f\n" % mos)
                            elif len(all_images) * 0.2 > len(val_images):  # Assuming 20% for validation
                                val_img_f.write("%s\n" % img_path)
                                val_mos_f.write("%f\n" % mos)
                            else:
                                test_img_f.write("%s\n" % img_path)
                                test_mos_f.write("%f\n" % mos)
                        all_images = []
                        all_mos = []

        # Ensure remaining items are written
        for img_path, mos in zip(all_images, all_mos):
            if len(all_images) * 0.6 > len(train_images):  # Assuming 60% for training
                train_img_f.write("%s\n" % img_path)
                train_mos_f.write("%f\n" % mos)
            elif len(all_images) * 0.2 > len(val_images):  # Assuming 20% for validation
                val_img_f.write("%s\n" % img_path)
                val_mos_f.write("%f\n" % mos)
            else:
                test_img_f.write("%s\n" % img_path)
                test_mos_f.write("%f\n" % mos)

# Execute the function to process images
print("Processing images...")
process_images(input_folder)
print("Processing complete.")


Processing images...
OCR output for output\264286_00007889.jpg:
[([[6, 16], [158, 16], [158, 44], [6, 44]], 'MOS:52.80', 0.7943620146017881)]
OCR output for output\264287_00007863.jpg:
[([[6, 16], [156, 16], [156, 44], [6, 44]], 'MOS:58.64', 0.9158126494409006)]
OCR output for output\264288_00012917.jpg:
[([[6, 16], [154, 16], [154, 44], [6, 44]], 'MOS:61.91', 0.6203735726667217)]
OCR output for output\264293_00038238.jpg:
[([[6, 15], [155, 15], [155, 41], [6, 41]], 'Mos:66.38', 0.7465810498754896)]
OCR output for output\264294_00027762.jpg:
[([[5, 14], [156, 14], [156, 44], [5, 44]], 'MOS:69.10', 0.8290336548434803)]
OCR output for output\264298_00035269.jpg:
[([[6, 16], [156, 16], [156, 42], [6, 42]], 'MoS:61.28', 0.48050956532864003)]
OCR output for output\264326_00020429.jpg:
[([[6, 16], [158, 16], [158, 44], [6, 44]], 'MOS:38.96', 0.9408063893607485)]
OCR output for output\264328_00004694.jpg:
[([[6, 16], [156, 16], [156, 42], [6, 42]], 'Mos:78.44', 0.8005365157581146)]
OCR output

NameError: name 'train_images' is not defined