In [1]:
from PIL import Image
import os
from collections import Counter
import time

def get_image_info(directory):
    image_info = []
    width_distribution = Counter()
    height_distribution = Counter()
    resolution_distribution = Counter()

    max_resolution = 0
    min_resolution = float('inf')
    max_res_file = ""
    min_res_file = ""

    max_width = 0
    min_width = float('inf')
    max_width_file = ""
    min_width_file = ""

    max_height = 0
    min_height = float('inf')
    max_height_file = ""
    min_height_file = ""

    for filename in os.listdir(directory):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            path = os.path.join(directory, filename)
            with Image.open(path) as img:
                width, height = img.size
                resolution = img.width * img.height

                # 更新宽度统计
                if width > max_width:
                    max_width = width
                    max_width_file = filename
                if width < min_width:
                    min_width = width
                    min_width_file = filename

                # 更新高度统计
                if height > max_height:
                    max_height = height
                    max_height_file = filename
                if height < min_height:
                    min_height = height
                    min_height_file = filename

                # 更新分辨率统计
                if resolution > max_resolution:
                    max_resolution = resolution
                    max_res_file = filename
                
                if resolution < min_resolution:
                    min_resolution = resolution
                    min_res_file = filename

                # 分布统计
                width_distribution[width // 100 * 100] += 1
                height_distribution[height // 100 * 100] += 1
                resolution_distribution[resolution // 50000 * 50000] += 1

                # 收集图片信息
                info = {
                    'filename': filename,
                    'width': width,
                    'height': height,
                    'resolution': resolution
                }
                image_info.append(info)

    # 打印统计数据
    print("Width Distribution (by 100 pixels):")
    for w, count in sorted(width_distribution.items()):
        print(f"{w} to {w+99} pixels: {count} images")
    print(f"Max Width: {max_width} pixels (File: {max_width_file}), Min Width: {min_width} pixels (File: {min_width_file})")

    print("Height Distribution (by 100 pixels):")
    for h, count in sorted(height_distribution.items()):
        print(f"{h} to {h+99} pixels: {count} images")
    print(f"Max Height: {max_height} pixels (File: {max_height_file}), Min Height: {min_height} pixels (File: {min_height_file})")

    print("Resolution Distribution (by 1 MP):")
    for r, count in sorted(resolution_distribution.items()):
        print(f"{r} to {r+49999} pixels: {count} images")
    print(f"Max Resolution: {max_resolution} pixels (File: {max_res_file}), Min Resolution: {min_resolution} pixels (File: {min_res_file})")
    
    return image_info




# 使用示例
directory = '../data/source/png'
image_info = get_image_info(directory)
image_info

Width Distribution (by 100 pixels):
100 to 199 pixels: 1 images
200 to 299 pixels: 14 images
300 to 399 pixels: 60 images
400 to 499 pixels: 205 images
500 to 599 pixels: 341 images
600 to 699 pixels: 451 images
700 to 799 pixels: 594 images
800 to 899 pixels: 700 images
900 to 999 pixels: 789 images
1000 to 1099 pixels: 791 images
1100 to 1199 pixels: 862 images
1200 to 1299 pixels: 893 images
1300 to 1399 pixels: 785 images
1400 to 1499 pixels: 803 images
1500 to 1599 pixels: 762 images
1600 to 1699 pixels: 744 images
1700 to 1799 pixels: 705 images
1800 to 1899 pixels: 656 images
1900 to 1999 pixels: 653 images
2000 to 2099 pixels: 590 images
2100 to 2199 pixels: 537 images
2200 to 2299 pixels: 455 images
2300 to 2399 pixels: 425 images
2400 to 2499 pixels: 399 images
2500 to 2599 pixels: 322 images
2600 to 2699 pixels: 260 images
2700 to 2799 pixels: 258 images
2800 to 2899 pixels: 198 images
2900 to 2999 pixels: 177 images
3000 to 3099 pixels: 133 images
3100 to 3199 pixels: 107 i

[{'filename': '0092327.png',
  'width': 2041,
  'height': 622,
  'resolution': 1269502},
 {'filename': '0115199.png',
  'width': 1815,
  'height': 349,
  'resolution': 633435},
 {'filename': '0080811.png',
  'width': 1905,
  'height': 241,
  'resolution': 459105},
 {'filename': '0057954.png',
  'width': 746,
  'height': 336,
  'resolution': 250656},
 {'filename': '0055494.png',
  'width': 468,
  'height': 259,
  'resolution': 121212},
 {'filename': '0035063.png',
  'width': 2155,
  'height': 238,
  'resolution': 512890},
 {'filename': '0110460.png',
  'width': 2439,
  'height': 274,
  'resolution': 668286},
 {'filename': '0057353.png',
  'width': 2460,
  'height': 1165,
  'resolution': 2865900},
 {'filename': '0081487.png',
  'width': 1179,
  'height': 484,
  'resolution': 570636},
 {'filename': '0094407.png',
  'width': 582,
  'height': 244,
  'resolution': 142008},
 {'filename': '0003634.png',
  'width': 809,
  'height': 282,
  'resolution': 228138},
 {'filename': '0083072.png',
  'w

: 

In [3]:
from PIL import Image, ImageOps
import os

def pad_images(directory, image_info, save_directory):
    idx = 0
    for info in image_info:
        path = os.path.join(directory, info['filename'])
        idx += 1
        if idx % 10 == 0:
            print(f"Padding images: {idx}/{len(image_info)} finished")
        with Image.open(path) as img:
            padding_width = int(img.width * 0.05)
            padding_height = int(img.height * 0.05)
            padded_img = ImageOps.expand(img, border=(padding_width, padding_height), fill='white')
            # 保存填充后的图片
            save_path = os.path.join(save_directory, f"padded_{info['filename']}")
            padded_img.save(save_path)
            
            
def resize_images(directory, target_height=256, save_directory='../data/resized'):
    file_list = [f for f in os.listdir(directory) if f.startswith('padded_')]
    idx = 0
    for filename in file_list:
        idx += 1
        if idx % 10 == 0:
            print(f"Resizing images: {idx}/{len(file_list)} finished")
        path = os.path.join(directory, filename)
        with Image.open(path) as img:
            aspect_ratio = img.width / img.height
            new_width = int(target_height * aspect_ratio)
            resized_img = img.resize((new_width, target_height), Image.ANTIALIAS)
            save_path = os.path.join(save_directory, f"{filename[len('padded_'):]}")
            resized_img.save(save_path)


In [4]:
def main():
    start_time = time.time()
    source_directory = '../data/source/png'
    padded_directory = '../data/padding'
    resized_directory = '../data/resized'
    
    # 确保保存目录存在
    os.makedirs(padded_directory, exist_ok=True)
    os.makedirs(resized_directory, exist_ok=True)
    
    # 获取图片信息
    image_info = get_image_info(source_directory)
    print(f"Image information collection completed. Time elapsed: {time.time() - start_time} seconds")
    
    # 应用padding并保存
    pad_images(source_directory, image_info, padded_directory)
    
    print(f"Padding completed. Time elapsed: {time.time() - start_time} seconds")
    
    # 从保存的padding图片读取，调整大小并保存
    resize_images(padded_directory, 256, resized_directory)
    
    print(f"Resizing completed. Total time elapsed: {time.time() - start_time} seconds")

    print("All processes are completed.")

main()


Width Distribution (by 100 pixels):
100 to 199 pixels: 1 images
200 to 299 pixels: 14 images
300 to 399 pixels: 60 images
400 to 499 pixels: 205 images
500 to 599 pixels: 341 images
600 to 699 pixels: 451 images
700 to 799 pixels: 594 images
800 to 899 pixels: 700 images
900 to 999 pixels: 789 images
1000 to 1099 pixels: 791 images
1100 to 1199 pixels: 862 images
1200 to 1299 pixels: 893 images
1300 to 1399 pixels: 785 images
1400 to 1499 pixels: 803 images
1500 to 1599 pixels: 762 images
1600 to 1699 pixels: 744 images
1700 to 1799 pixels: 705 images
1800 to 1899 pixels: 656 images
1900 to 1999 pixels: 653 images
2000 to 2099 pixels: 590 images
2100 to 2199 pixels: 537 images
2200 to 2299 pixels: 455 images
2300 to 2399 pixels: 425 images
2400 to 2499 pixels: 399 images
2500 to 2599 pixels: 322 images
2600 to 2699 pixels: 260 images
2700 to 2799 pixels: 258 images
2800 to 2899 pixels: 198 images
2900 to 2999 pixels: 177 images
3000 to 3099 pixels: 133 images
3100 to 3199 pixels: 107 i

In [9]:
directory = '../data/resized'
image_info = get_image_info(directory)
image_info

Width Distribution (by 100 pixels):
100 to 199 pixels: 33 images
200 to 299 pixels: 158 images
300 to 399 pixels: 456 images
400 to 499 pixels: 786 images
500 to 599 pixels: 1183 images
600 to 699 pixels: 1203 images
700 to 799 pixels: 1265 images
800 to 899 pixels: 1235 images
900 to 999 pixels: 1227 images
1000 to 1099 pixels: 1089 images
1100 to 1199 pixels: 932 images
1200 to 1299 pixels: 836 images
1300 to 1399 pixels: 766 images
1400 to 1499 pixels: 670 images
1500 to 1599 pixels: 513 images
1600 to 1699 pixels: 441 images
1700 to 1799 pixels: 378 images
1800 to 1899 pixels: 323 images
1900 to 1999 pixels: 270 images
2000 to 2099 pixels: 234 images
2100 to 2199 pixels: 184 images
2200 to 2299 pixels: 125 images
2300 to 2399 pixels: 143 images
2400 to 2499 pixels: 97 images
2500 to 2599 pixels: 85 images
2600 to 2699 pixels: 61 images
2700 to 2799 pixels: 66 images
2800 to 2899 pixels: 59 images
2900 to 2999 pixels: 42 images
3000 to 3099 pixels: 31 images
3100 to 3199 pixels: 25 

[{'filename': '0092327.png',
  'width': 840,
  'height': 256,
  'resolution': 215040},
 {'filename': '0115199.png',
  'width': 1333,
  'height': 256,
  'resolution': 341248},
 {'filename': '0080811.png',
  'width': 2023,
  'height': 256,
  'resolution': 517888},
 {'filename': '0057954.png',
  'width': 570,
  'height': 256,
  'resolution': 145920},
 {'filename': '0055494.png',
  'width': 464,
  'height': 256,
  'resolution': 118784},
 {'filename': '0035063.png',
  'width': 2332,
  'height': 256,
  'resolution': 596992},
 {'filename': '0110460.png',
  'width': 2287,
  'height': 256,
  'resolution': 585472},
 {'filename': '0057353.png',
  'width': 540,
  'height': 256,
  'resolution': 138240},
 {'filename': '0081487.png',
  'width': 623,
  'height': 256,
  'resolution': 159488},
 {'filename': '0094407.png',
  'width': 611,
  'height': 256,
  'resolution': 156416},
 {'filename': '0003634.png',
  'width': 734,
  'height': 256,
  'resolution': 187904},
 {'filename': '0083072.png',
  'width':

: 