In [None]:
import cv2
import os
import re
import time
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import matplotlib.pyplot as plt


def extract_page_number(filename):
    match = re.search(r'\d+', filename)
    if match:
        return int(match.group())
    else:
        raise ValueError("Filename does not contain a page number")


def detect_page_boundaries(image, page_number):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    height, width = gray.shape
    center_col = width // 2

    top_boundary = np.argmax(gray[:, center_col] > 100)
    bottom_boundary = height - np.argmax(gray[::-1, center_col] > 100)

    boundary_offset = int(2 * 118.11)  # Примерно 236 пикселей
    if page_number % 2 == 0:
        left_boundary = np.argmax(gray[height // 2, :] > 100)
        right_boundary = width - boundary_offset
    else:
        right_boundary = width - np.argmax(gray[height // 2, ::-1] > 100)
        left_boundary = boundary_offset

    return top_boundary, bottom_boundary, left_boundary, right_boundary


def process_image(image_path):
    try:
        image = cv2.imread(image_path)
        page_number = extract_page_number(os.path.basename(image_path))
        return detect_page_boundaries(image, page_number)
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None


def find_min_frame_parallel(folder_path):
    start_time = time.time()
    min_top = float('inf')
    max_bottom = float('-inf')
    min_left = float('inf')
    max_right = float('-inf')

    image_paths = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if filename.endswith(".tif")]

    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
        results = list(executor.map(process_image, image_paths))

    for result in results:
        if result:
            top, bottom, left, right = result
            min_top = min(min_top, top)
            max_bottom = max(max_bottom, bottom)
            min_left = min(min_left, left)
            max_right = max(max_right, right)

    elapsed_time = time.time() - start_time
    print(f"find_min_frame_parallel took {elapsed_time:.2f} seconds")
    return min_top, max_bottom, min_left, max_right


def crop_and_save(image_path, min_frame, output_folder_tiff, output_folder_jpg):
    start_time = time.time()
    image = cv2.imread(image_path)
    height, width = image.shape[:2]
    min_top, max_bottom, min_left, max_right = min_frame

    center_x = (min_left + max_right) // 2
    center_y = (min_top + max_bottom) // 2
    frame_n_top = center_y - (max_bottom - min_top) // 2
    frame_n_bottom = center_y + (max_bottom - min_top) // 2
    frame_n_left = center_x - (max_right - min_left) // 2
    frame_n_right = center_x + (max_right - min_left) // 2

    cm_to_pixels = int(0.5 * 118.11)
    tiff_top = max(0, frame_n_top - cm_to_pixels)
    tiff_bottom = min(height, frame_n_bottom + cm_to_pixels)
    tiff_left = max(0, frame_n_left - cm_to_pixels)
    tiff_right = min(width, frame_n_right + cm_to_pixels)
    cropped_tiff = image[tiff_top:tiff_bottom, tiff_left:tiff_right]

    page_number = extract_page_number(os.path.basename(image_path))
    cm_to_pixels = int(0.5 * 118.11)

    if page_number % 2 == 0:
        cm_to_pixels_more = int(0.8 * 118.11)
        cm_to_pixels_less = int(0.2 * 118.11)
        jpg_left = max(0, frame_n_left + cm_to_pixels_more)
        jpg_right = min(width, frame_n_right - cm_to_pixels_less)
    else:
        cm_to_pixels_more = int(0.8 * 118.11)
        cm_to_pixels_less = int(0.2 * 118.11)
        jpg_left = max(0, frame_n_left + cm_to_pixels_less)
        jpg_right = min(width, frame_n_right - cm_to_pixels_more)

    jpg_top = max(0, frame_n_top + cm_to_pixels)
    jpg_bottom = min(height, frame_n_bottom - cm_to_pixels)
    cropped_jpg = image[jpg_top:jpg_bottom, jpg_left:jpg_right]

    filename = os.path.splitext(os.path.basename(image_path))[0]

    tiff_output_path = os.path.join(output_folder_tiff, filename + ".tif")
    jpg_output_path = os.path.join(output_folder_jpg, filename + ".jpg")

    cv2.imwrite(tiff_output_path, cropped_tiff)
    cv2.imwrite(jpg_output_path, cropped_jpg)
    elapsed_time = time.time() - start_time
    print(f"Saved {filename}.tif and {filename}.jpg in {elapsed_time:.2f} seconds")

    return image, (frame_n_top, frame_n_bottom, frame_n_left, frame_n_right)


def process_folder(folder_path, output_folder_tiff, output_folder_jpg):
    start_time = time.time()
    min_frame = find_min_frame_parallel(folder_path)
    os.makedirs(output_folder_tiff, exist_ok=True)
    os.makedirs(output_folder_jpg, exist_ok=True)

    images_with_frames = []

    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
        futures = []
        for filename in os.listdir(folder_path):
            if filename.endswith(".tif"):
                image_path = os.path.join(folder_path, filename)
                futures.append(executor.submit(crop_and_save, image_path, min_frame, output_folder_tiff, output_folder_jpg))
        for future in futures:
            result = future.result()
            if result:
                images_with_frames.append(result)

    total_elapsed_time = time.time() - start_time
    print(f"process_folder took {total_elapsed_time:.2f} seconds")
    
    return images_with_frames


def plot_images_with_frames(images_with_frames):
    for image, frame in images_with_frames:
        frame_n_top, frame_n_bottom, frame_n_left, frame_n_right = frame
        plt.figure(figsize=(10, 10))
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.gca().add_patch(plt.Rectangle((frame_n_left, frame_n_top), frame_n_right - frame_n_left, frame_n_bottom - frame_n_top, 
                                          linewidth=2, edgecolor='r', facecolor='none'))
        plt.title("Image with Crop Frame")
        plt.show()


folder_path = '/Users/mac/Yandex.Disk-adnemanov@stud.kpfu.ru.localized/Загрузки/15.05.2024/208743'
output_folder_tiff = 'result/tiff'
output_folder_jpg = 'result/jpg'
images_with_frames = process_folder(folder_path, output_folder_tiff, output_folder_jpg)
plot_images_with_frames(images_with_frames)
