# 基于传统ML的图像像素识别

Image Loading

In [1]:
import cv2
import numpy as np
from PIL import Image
import os
import torch
import matplotlib.pyplot as plt
from os.path import join


In [3]:
# Function to binarize a tensor of images using Otsu's method
def binarize_tensor_images(tensor_images):
    binary_images = []
    for image in tensor_images:
        # Convert PyTorch tensor to numpy array
        image_np = image.numpy().squeeze()  # Remove single-dimensional entries
        
        # Ensure the image is in uint8 format
        if image_np.dtype != np.uint8:
            image_np = (image_np * 255).astype(np.uint8)
        
        # Apply Otsu's thresholding
        _, binary_image = cv2.threshold(image_np, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        
        binary_images.append(torch.tensor(binary_image))
    return torch.stack(binary_images)

In [4]:
# Function to load and binarize images in batches
def process_images_in_batches(data_path, batch_size=100):
    img_list = []

    for file in os.listdir(data_path):
        if not file.endswith('seg_data.pt'):
            continue
        img_path = join(data_path, file)
        data = torch.load(img_path)
        img_list.append(data)
        print(f"Loaded {file}")

    img_list = torch.cat(img_list)
    print("Loaded images shape:", img_list.shape)
    print("Data type of loaded images:", img_list.dtype)

    binarized_list = []

    # Process images in batches
    num_batches = len(img_list) // batch_size + 1
    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = min((i + 1) * batch_size, len(img_list))
        batch = img_list[start_idx:end_idx]
        binarized_batch = binarize_tensor_images(batch)
        print(binarized_batch[1])
        binarized_list.append(binarized_batch)
        print(f"Processed batch {i+1}/{num_batches}")

    binarized_images = torch.cat(binarized_list)
    return binarized_images

In [6]:
CVL_path ='/root/autodl-tmp/APS360_Project/Datasets/CVL_Processed'
IAM_path = '/root/autodl-tmp/APS360_Project/Datasets/IAM_Processed'

In [7]:
cvlt_binarized_images = process_images_in_batches(CVL_path, batch_size=100)


Loaded seg_data.pt
Loaded images shape: torch.Size([1598, 1, 1024, 1024])
Data type of loaded images: torch.float32
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
Processed batch 1/16
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
Processed batch 2/16
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
Processed batch 3/16
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        

In [None]:
torch.save(cvlt_binarized_images, '/root/autodl-tmp/APS360_Project/Machine_Learning_Output/binary_image_bounding_box/CVL_binary.pt')
print("Saved binarized CVL images")

In [13]:
# Function to invert and binarize a tensor of images using Otsu's method
def binarize_tensor_images_IAM(tensor_images):
    binary_images = []
    for image in tensor_images:
        # Convert PyTorch tensor to numpy array
        image_np = image.numpy().squeeze()  # Remove single-dimensional entries
        
        # Ensure the image is in uint8 format
        if image_np.dtype != np.uint8:
            image_np = (image_np * 255).astype(np.uint8)
        
        # Invert the image
        image_np = 255 - image_np
        
        # Apply Otsu's thresholding
        _, binary_image = cv2.threshold(image_np, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        
        binary_images.append(torch.tensor(binary_image, dtype=torch.uint8))
    return torch.stack(binary_images)

# Function to process images in batches
def process_images_in_batches_IAM(data_path, batch_size=100):
    img_list = []

    for file in os.listdir(data_path):
        if not file.endswith('seg_data.pt'):
            continue
        img_path = join(data_path, file)
        data = torch.load(img_path)
        img_list.append(data)
        print(f"Loaded {file}")

    img_list = torch.cat(img_list)
    print("Loaded images shape:", img_list.shape)
    print("Data type of loaded images:", img_list.dtype)

    binarized_list = []

    # Process images in batches
    num_batches = len(img_list) // batch_size + 1
    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = min((i + 1) * batch_size, len(img_list))
        batch = img_list[start_idx:end_idx]
        binarized_batch = binarize_tensor_images_IAM(batch)
        print(binarized_batch[1])
        binarized_list.append(binarized_batch)
        print(f"Processed batch {i+1}/{num_batches}")

    binarized_images = torch.cat(binarized_list)
    return binarized_images

In [14]:
# Process IAM images
iam_binarized_images = process_images_in_batches_IAM(IAM_path, batch_size=100)


Loaded seg_data.pt
Loaded images shape: torch.Size([1539, 1, 1024, 1024])
Data type of loaded images: torch.float32
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
Processed batch 1/16
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
Processed batch 2/16
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
Processed batch 3/16
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        

KeyboardInterrupt: 

: 

In [None]:
torch.save(iam_binarized_images, '/root/autodl-tmp/APS360_Project/Machine_Learning_Output/binary_image_bounding_box/IAM_binary.pt')
print("Saved binarized IAM images")

Connected Component Detection

In [1]:
def connected_component_labeling_batch(tensor_images):
    results = []
    for image in tensor_images:
        image_np = image.numpy()
        # Apply connected component labeling
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(image_np, connectivity=8)
        results.append((num_labels, labels, stats, centroids))
    return results

def process_in_batches(tensor_images, batch_size, processing_fn, output_path):
    processed_batches = []
    num_batches = len(tensor_images) // batch_size + (1 if len(tensor_images) % batch_size != 0 else 0)
    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = min((i + 1) * batch_size, len(tensor_images))
        batch = tensor_images[start_idx:end_idx]
        processed_batch = processing_fn(batch)
        processed_batches.extend(processed_batch)
        
        # Save intermediate results to avoid memory overflow
        torch.save(processed_batches, output_path)
        processed_batches = []  # Clear list to free memory
        
        print(f"Processed batch {i + 1}/{num_batches}")
        
        # Free GPU memory if necessary
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    return processed_batches

In [36]:
output_dir = '/root/autodl-tmp/APS360_Project/Machine_Learning_Output/binary_image_bounding_box'

# Load binarized images
CVL_binarized_images_path = '/root/autodl-tmp/APS360_Project/Machine_Learning_Output/binary_image_bounding_box/CVL_binary.pt'
IAM_binarized_images_path = '/root/autodl-tmp/APS360_Project/Machine_Learning_Output/binary_image_bounding_box/IAM_binary.pt'

cvl_binarized_images = torch.load(CVL_binarized_images_path)
iam_binarized_images = torch.load(IAM_binarized_images_path)

# Process in smaller batches
batch_size = 50

In [5]:
# CVL dataset
cvl_output_path = os.path.join(output_dir, 'CVL_seg_connected_components.pt')
cvl_labels_stats_centroids = process_in_batches(cvl_binarized_images, batch_size, connected_component_labeling_batch, cvl_output_path)

Processed batch 1/32
Processed batch 2/32
Processed batch 3/32
Processed batch 4/32
Processed batch 5/32
Processed batch 6/32
Processed batch 7/32
Processed batch 8/32
Processed batch 9/32
Processed batch 10/32
Processed batch 11/32
Processed batch 12/32
Processed batch 13/32
Processed batch 14/32
Processed batch 15/32
Processed batch 16/32
Processed batch 17/32
Processed batch 18/32
Processed batch 19/32
Processed batch 20/32
Processed batch 21/32
Processed batch 22/32
Processed batch 23/32
Processed batch 24/32
Processed batch 25/32
Processed batch 26/32
Processed batch 27/32
Processed batch 28/32
Processed batch 29/32
Processed batch 30/32
Processed batch 31/32
Processed batch 32/32


In [6]:
# IAM dataset
iam_output_path = os.path.join(output_dir, 'IAM_seg_connected_components.pt')
iam_labels_stats_centroids = process_in_batches(iam_binarized_images, batch_size, connected_component_labeling_batch, iam_output_path)

print("Connected component detection completed for CVL and IAM datasets.")

Processed batch 1/31
Processed batch 2/31
Processed batch 3/31
Processed batch 4/31
Processed batch 5/31
Processed batch 6/31
Processed batch 7/31
Processed batch 8/31
Processed batch 9/31
Processed batch 10/31
Processed batch 11/31
Processed batch 12/31
Processed batch 13/31
Processed batch 14/31
Processed batch 15/31
Processed batch 16/31
Processed batch 17/31
Processed batch 18/31
Processed batch 19/31
Processed batch 20/31
Processed batch 21/31
Processed batch 22/31
Processed batch 23/31
Processed batch 24/31
Processed batch 25/31
Processed batch 26/31
Processed batch 27/31
Processed batch 28/31
Processed batch 29/31
Processed batch 30/31
Processed batch 31/31
Connected component detection completed for CVL and IAM datasets.


Bounding Box Calculation

In [34]:
# Function to apply connected component labeling and calculate bounding boxes
def calculate_bounding_boxes(tensor_images):
    results = []
    for image_tensor in tensor_images:
        # Convert tensor to numpy array
        image_np = image_tensor.numpy()
        
        # Convert to binary image (assuming already binarized)
        binary_image_np = np.uint8(image_np * 255)
        
        # Apply connected component labeling
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image_np, connectivity=8)
        
        # Extract bounding boxes from stats
        bounding_boxes = []
        for stat in stats[1:]:  # Skip the first entry which is the background
            x, y, w, h, area = stat
            bounding_boxes.append((x, y, x + w, y + h))  # Format: (x_min, y_min, x_max, y_max)
        
        results.append((labels, bounding_boxes))
    
    return results

In [37]:
# Process in batches
batch_size = 100  # Adjust batch size as needed
num_batches = len(cvl_binarized_images) // batch_size + 1

cvl_results = []
for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, len(cvl_binarized_images))
    batch_images = cvl_binarized_images[start_idx:end_idx]
    
    batch_results = calculate_bounding_boxes(batch_images)
    cvl_results.extend(batch_results)

print("Bounding boxes calculated for CVL dataset.")


Bounding boxes calculated for CVL dataset.
Saved CVL bounding boxes part 0
Saved CVL bounding boxes part 1
Saved CVL bounding boxes part 2
Saved CVL bounding boxes part 3
Saved CVL bounding boxes part 4
Saved CVL bounding boxes part 5
Saved CVL bounding boxes part 6
Saved CVL bounding boxes part 7
Saved CVL bounding boxes part 8
Saved CVL bounding boxes part 9


In [75]:
# Process in batches
batch_size = 100  # Adjust batch size as needed
num_batches = len(iam_binarized_images) // batch_size + 1

iam_results = []
for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, len(iam_binarized_images))
    batch_images = iam_binarized_images[start_idx:end_idx]
    
    batch_results = calculate_bounding_boxes(batch_images)
    iam_results.extend(batch_results)

print("Bounding boxes calculated for IAM dataset.")


Bounding boxes calculated for IAM dataset.


Drawing Bounding Boxes

In [73]:
# Function to load a single binary image
def load_binary_image(file_path):
    try:
        image = torch.load(file_path)
        return image
    except Exception as e:
        print(f"Error loading binary image from {file_path}: {e}")
        return None

# Function to overlay bounding boxes on an image
def overlay_bounding_boxes(image, bounding_boxes, save_path, box_color='r'):
    useless, list_of_tuples = bounding_boxes
    image_np = image.squeeze().cpu().numpy()
    image_np = np.uint8(image_np.copy())
    fig, ax = plt.subplots(1)
    ax.imshow(image_np, cmap='gray')
    
    for bbox in list_of_tuples:
        x_min, y_min, x_max, y_max = bbox
        rect = plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, fill=False, edgecolor=box_color, linewidth=1)
        ax.add_patch(rect)
    
    ax.axis('off')
    
    # Save the plot as an image file
    plt.savefig(save_path)

    # Close the Matplotlib plot to free up memory
    plt.close()


In [59]:
# Paths to your binary image files (adjust paths accordingly)
CVL_file_path = '/root/autodl-tmp/APS360_Project/Machine_Learning_Output/binary_image_bounding_box/CVL_binary.pt'
IAM_file_path = '/root/autodl-tmp/APS360_Project/Machine_Learning_Output/binary_image_bounding_box/IAM_binary.pt'

# Paths to your bounding box folders (adjust paths accordingly)
bounding_box_folder1 = '/root/autodl-tmp/APS360_Project/Machine_Learning_Output/binary_image_bounding_box/CVL_bounding_boxes'
bounding_box_folder2 = '/root/autodl-tmp/APS360_Project/Machine_Learning_Output/binary_image_bounding_box/IAM_bounding_boxes'
    
# Load binary images
cvl_binary_images = load_binary_image(CVL_file_path)
iam_binary_images = load_binary_image(IAM_file_path)


In [57]:
print(len(cvl_results[1000]))

2


In [None]:
# Process CVL dataset images and bounding boxes
if cvl_binary_images is not None:
    for i in range(len(cvl_binary_images)):
        filename = f'CVL_{i}.png'
        save_path_cvl = os.path.join('/root/autodl-tmp/APS360_Project/Machine_Learning_Output/output/CVL', filename)
        overlay_bounding_boxes(cvl_binary_images[i], cvl_results[i], save_path_cvl )


In [76]:

# Process IAM dataset images and bounding boxes
if iam_binary_images is not None:
    for i in range(len(iam_binary_images)):
        filename = f'IAM_{i}.png'
        save_path_iam = os.path.join('/root/autodl-tmp/APS360_Project/Machine_Learning_Output/output/IAM', filename)
        overlay_bounding_boxes(iam_binary_images[i], iam_results[i], save_path_iam)