In [2]:
import os
import random
import cv2
import numpy as np
from pdf2image import convert_from_path,pdfinfo_from_path
from tqdm import tqdm
import tempfile

In [3]:

# 配置
OUTPUT_DIR = 'dataset'
TRAIN_PAIRS = 5000
VAL_PAIRS = 500
PATCH_SIZE = 50
TARGET_SIZE = 16
STRIDE = 10
TV_TOP_RATIO = 0.1


In [4]:

# 确保输出目录存在
os.makedirs(os.path.join(OUTPUT_DIR, 'train', 'blurred'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'train', 'target'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'val', 'blurred'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'val', 'target'), exist_ok=True)


In [5]:

def compute_tv(patch):
    gray = cv2.cvtColor(patch, cv2.COLOR_RGB2GRAY)
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    return np.sum(np.abs(sobelx) + np.abs(sobely))


In [6]:

def defocus_kernel(radius):
    if radius == 0:
        return np.array([[1.0]])
    d = int(2*radius + 1)
    kernel = np.zeros((d, d))
    center = radius
    y, x = np.ogrid[:d, :d]
    mask = (x - center)**2 + (y - center)**2 <= radius**2
    kernel[mask] = 1.0
    kernel /= kernel.sum()
    return kernel


In [7]:

def motion_kernel(size):
    kernel = np.zeros((size, size))
    cx, cy = size//2, size//2
    x, y = cx, cy
    path = [(x, y)]
    for _ in range(size-1):
        dx, dy = random.choice([(1,0), (-1,0), (0,1), (0,-1), (1,1), (1,-1), (-1,1), (-1,-1)])
        x = max(0, min(size-1, x+dx))
        y = max(0, min(size-1, y+dy))
        path.append((x, y))
    for (x, y) in set(path):
        kernel[y, x] = 1.0
    kernel /= kernel.sum()
    return kernel


In [8]:

def process_and_save(clean_patch, file_id, subset):
    """
    clean_patch: 50x50 RGB 浮点 [0,1]
    file_id: 6位数字字符串
    subset: 'train' 或 'val'
    """
    # 1. 几何变换（旋转）
    angle = np.random.normal(0, 4)
    M = cv2.getRotationMatrix2D((PATCH_SIZE/2, PATCH_SIZE/2), angle, 1)
    transformed = cv2.warpAffine(clean_patch, M, (PATCH_SIZE, PATCH_SIZE), borderMode=cv2.BORDER_REPLICATE)
    
    # 2. 提取目标 (中心16x16)
    start = (PATCH_SIZE - TARGET_SIZE) // 2
    target = transformed[start:start+TARGET_SIZE, start:start+TARGET_SIZE].copy()
    
    # 3. 生成模糊核
    if random.random() < 0.5:
        r = random.uniform(0, 4)
        kernel = defocus_kernel(r)
    else:
        s = random.randint(5, 21)
        kernel = motion_kernel(s)
    
    blurred = cv2.filter2D(transformed, -1, kernel)
    
    # 4. 加噪声
    sigma = random.uniform(0, 7/255)
    noise = np.random.normal(0, sigma, blurred.shape)
    blurred = np.clip(blurred + noise, 0, 1)
    
    # 5. 量化模拟 (实际保存时转为uint8)
    blurred_uint8 = (blurred * 255).astype(np.uint8)
    target_uint8 = (target * 255).astype(np.uint8)
    
    # 6. 保存为PNG
    blurred_path = os.path.join(OUTPUT_DIR, subset, 'blurred', f'{file_id}.png')
    target_path = os.path.join(OUTPUT_DIR, subset, 'target', f'{file_id}.png')
    cv2.imwrite(blurred_path, cv2.cvtColor(blurred_uint8, cv2.COLOR_RGB2BGR))  # OpenCV 使用BGR
    cv2.imwrite(target_path, cv2.cvtColor(target_uint8, cv2.COLOR_RGB2BGR))
    
   
    return {'file_id': file_id, 'blur_type': 'defocus' if random.random()<0.5 else 'motion', 'sigma': sigma}


In [9]:
import glob

candidate_patches = []
original_folder = r"D:\CNN_for_Direct_Text_Deblurring\Code\dataset\original"
pdf_files = glob.glob(os.path.join(original_folder, "*.pdf"))
print(len(pdf_files))

42


In [10]:
with tempfile.TemporaryDirectory() as temp_dir:
    for pdf in tqdm(pdf_files):
        dpi = random.randint(210, 250)
        # 获取 PDF 总页数
        info = pdfinfo_from_path(pdf, userpw=None, poppler_path=None)  
        total_pages = info["Pages"]
        pdf_name = os.path.basename(pdf)
        print(f"Processing {pdf_name}...")
        # 逐页处理
        for page_num in range(1, total_pages + 1):
    
                paths = convert_from_path(
                    pdf, 
                    dpi=dpi, 
                    first_page=page_num, 
                    last_page=page_num,
                    paths_only=True,
                    poppler_path=None  # 如果有需要，指定 poppler 的 bin 路径
                )
                img_path = paths[0]      
                # 将 PIL Image 转换为 numpy 数组
                img_np = np.array(img_path)
                img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
            # 下采样2倍
                h, w = img_np.shape[:2]
                img_np = cv2.resize(img_np, (w//2, h//2), interpolation=cv2.INTER_CUBIC)
                # 滑动窗口计算TV
                tv_list = []
                for y in range(0, img_np.shape[0]-PATCH_SIZE, STRIDE):
                    for x in range(0, img_np.shape[1]-PATCH_SIZE, STRIDE):
                        patch = img_np[y:y+PATCH_SIZE, x:x+PATCH_SIZE]
                        tv = compute_tv(patch)
                        tv_list.append((tv, y, x))
                # 取前10%
                tv_list.sort(reverse=True)
                top = tv_list[:max(1, int(len(tv_list)*TV_TOP_RATIO))]
                for _, y, x in top:
                    patch = img_np[y:y+PATCH_SIZE, x:x+PATCH_SIZE].astype(np.float32) / 255.0
                    candidate_patches.append(patch)
            
print(f"Collected {len(candidate_patches)} candidate patches.")


  0%|          | 0/42 [00:00<?, ?it/s]

Processing 06299.pdf...


  2%|▏         | 1/42 [00:09<06:19,  9.26s/it]

Processing 0910.4610v1.pdf...


  5%|▍         | 2/42 [00:10<03:11,  4.80s/it]

Processing 1194984915674857495.pdf...


  7%|▋         | 3/42 [00:14<02:48,  4.31s/it]

Processing 1712.01916v1.pdf...


 10%|▉         | 4/42 [00:17<02:17,  3.62s/it]

Processing 1903.00763v1.pdf...


 12%|█▏        | 5/42 [00:21<02:23,  3.87s/it]

Processing 2101.07518v1.pdf...


 14%|█▍        | 6/42 [00:26<02:38,  4.41s/it]

Processing 2102.02808v1.pdf...


 17%|█▋        | 7/42 [00:32<02:49,  4.85s/it]

Processing 2111.11745v1.pdf...


 19%|█▉        | 8/42 [00:38<02:51,  5.03s/it]

Processing 22871-F.pdf...


 21%|██▏       | 9/42 [00:42<02:37,  4.78s/it]

Processing 2302.05309v3.pdf...


 24%|██▍       | 10/42 [00:46<02:28,  4.64s/it]

Processing 2305.12966v4.pdf...


 26%|██▌       | 11/42 [00:53<02:41,  5.22s/it]

Processing 2401.05055v2.pdf...


 29%|██▊       | 12/42 [01:17<05:31, 11.04s/it]

Processing 2404.15330v1.pdf...


 31%|███       | 13/42 [01:20<04:11,  8.67s/it]

Processing 2504.09648v1.pdf...


 33%|███▎      | 14/42 [01:38<05:18, 11.39s/it]

Processing Aljadaany_Douglas-Rachford_Networks_Learning_Both_the_Image_Prior_and_Data_Fidelity_CVPR_2019_paper.pdf...


 36%|███▌      | 15/42 [01:45<04:35, 10.20s/it]

Processing be138350-af45-4980-b0c1-c75c0af545fe.pdf...


 38%|███▊      | 16/42 [01:49<03:32,  8.17s/it]

Processing deblur_fergus.pdf...


 40%|████      | 17/42 [01:54<03:04,  7.37s/it]

Processing deblur_siggraph08.pdf...


 43%|████▎     | 18/42 [02:02<02:58,  7.43s/it]

Processing deconvLevinEtalCVPR09.pdf...


 45%|████▌     | 19/42 [02:08<02:38,  6.87s/it]

Processing feart-10-873986.pdf...


 48%|████▊     | 20/42 [02:15<02:34,  7.02s/it]

Processing Gong_From_Motion_Blur_CVPR_2017_paper.pdf...


 50%|█████     | 21/42 [02:24<02:38,  7.55s/it]

Processing JSTSP_dranka_coelho.pdf...


 52%|█████▏    | 22/42 [02:32<02:34,  7.70s/it]

Processing Kim_Real-World_Efficient_Blind_Motion_Deblurring_via_Blur_Pixel_Discretization_CVPR_2024_paper.pdf...


 55%|█████▍    | 23/42 [02:41<02:34,  8.13s/it]

Processing Kupyn_DeblurGAN-v2_Deblurring_Orders-of-Magnitude_Faster_and_Better_ICCV_2019_paper.pdf...


 57%|█████▋    | 24/42 [02:49<02:24,  8.00s/it]

Processing Kupyn_DeblurGAN_Blind_Motion_CVPR_2018_paper.pdf...


 60%|█████▉    | 25/42 [02:58<02:24,  8.53s/it]

Processing Li_All-in-One_Image_Restoration_for_Unknown_Corruption_CVPR_2022_paper.pdf...


 62%|██████▏   | 26/42 [03:08<02:19,  8.74s/it]

Processing Lv_Fourier_Priors-Guided_Diffusion_for_Zero-Shot_Joint_Low-Light_Enhancement_and_Deblurring_CVPR_2024_paper.pdf...


 64%|██████▍   | 27/42 [03:18<02:19,  9.32s/it]

Processing paper006.pdf...


 67%|██████▋   | 28/42 [03:25<02:00,  8.60s/it]

Processing popular_summary.pdf...


 69%|██████▉   | 29/42 [03:29<01:31,  7.03s/it]

Processing Purohit_Bringing_Alive_Blurred_Moments_CVPR_2019_paper.pdf...


 71%|███████▏  | 30/42 [03:39<01:36,  8.05s/it]

Processing Suin_Spatially-Attentive_Patch-Hierarchical_Network_for_Adaptive_Motion_Deblurring_CVPR_2020_paper.pdf...


 74%|███████▍  | 31/42 [03:50<01:36,  8.80s/it]

Processing Sun_Learning_a_Convolutional_2015_CVPR_paper.pdf...


 76%|███████▌  | 32/42 [03:59<01:30,  9.10s/it]

Processing Tu_MAXIM_Multi-Axis_MLP_for_Image_Processing_CVPR_2022_paper.pdf...


 79%|███████▊  | 33/42 [04:10<01:24,  9.43s/it]

Processing Wang_Uformer_A_General_U-Shaped_Transformer_for_Image_Restoration_CVPR_2022_paper.pdf...


 81%|████████  | 34/42 [04:22<01:21, 10.21s/it]

Processing Wu_DAVID_Dual-Attentional_Video_Deblurring_WACV_2020_paper.pdf...


 83%|████████▎ | 35/42 [04:31<01:10, 10.12s/it]

Processing Xia_DiffIR_Efficient_Diffusion_Model_for_Image_Restoration_ICCV_2023_paper.pdf...


 86%|████████▌ | 36/42 [04:44<01:05, 10.87s/it]

Processing xu2018.pdf...


 88%|████████▊ | 37/42 [04:56<00:56, 11.31s/it]

Processing yang2010.pdf...


 90%|█████████ | 38/42 [05:17<00:56, 14.21s/it]

Processing Yang_Gyro-based_Neural_Single_Image_Deblurring_CVPR_2025_paper.pdf...


 93%|█████████▎| 39/42 [05:28<00:39, 13.19s/it]

Processing Zamir_Multi-Stage_Progressive_Image_Restoration_CVPR_2021_paper.pdf...


 95%|█████████▌| 40/42 [05:42<00:26, 13.26s/it]

Processing Zhang_Dynamic_Scene_Deblurring_CVPR_2018_paper.pdf...


 98%|█████████▊| 41/42 [05:55<00:13, 13.41s/it]

Processing Zhang_Pixel_Screening_Based_Intermediate_Correction_for_Blind_Deblurring_CVPR_2022_paper.pdf...


100%|██████████| 42/42 [06:08<00:00,  8.78s/it]

Collected 521093 candidate patches.





In [11]:

# 随机打乱并分配训练/验证
random.shuffle(candidate_patches)
train_patches = candidate_patches[:TRAIN_PAIRS]
val_patches = candidate_patches[TRAIN_PAIRS:TRAIN_PAIRS+VAL_PAIRS]

# 处理训练集
metadata_train = []
for idx, patch in enumerate(tqdm(train_patches, desc='Train')):
    file_id = f"{idx+1:06d}"
    meta = process_and_save(patch, file_id, 'train')
    metadata_train.append(meta)

# 处理验证集
metadata_val = []
for idx, patch in enumerate(tqdm(val_patches, desc='Val')):
    file_id = f"{idx+1:06d}"
    meta = process_and_save(patch, file_id, 'val')
    metadata_val.append(meta)


  kernel /= kernel.sum()
  blurred_uint8 = (blurred * 255).astype(np.uint8)
Train: 100%|██████████| 5000/5000 [00:19<00:00, 256.55it/s]
Val: 100%|██████████| 500/500 [00:02<00:00, 245.61it/s]
