In [27]:
import os
import tensorflow as tf
from PIL import Image
import shutil
import random
import numpy as np


root_path = '/root/images/extend_train'


In [28]:
def down_image(root_dir, pixelValues):
    '''
    root_dir:根目录，由于直接保存到当前目录下，建议先拷贝一份再进行此操作
    pixlValue: 像素值
    '''
    for subdir in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir)
        print(subdir_path)
        for filename in os.listdir(subdir_path):
            filepath = os.path.join(subdir_path, filename)

            # 判断是否为文件，打开图片
            if(os.path.isfile(filepath)):
                with Image.open(filepath) as img:
                    # 缩小图片并保存
                    img = img.resize((pixelValues, pixelValues))
                    img.save(filepath)
    print("decode Over")

def add_gaussian_noise(image, noise_level):
    width, height = image.size
    mean = 0
    std = noise_level * 255
    noise = np.random.normal(mean, std, (height, width, 3)).astype(np.uint8)
    noisy_image = np.clip(np.array(image) + noise, 0, 255).astype(np.uint8)
    return Image.fromarray(noisy_image)

def flipHorizontallyVertically(root_dir, noise_level=0.1):
    
    for subdir in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir)
        print(subdir_path)
        if os.path.isdir(subdir_path):
            for filename in os.listdir(subdir_path):
                filepath = os.path.join(subdir_path, filename)
                if os.path.isfile(filepath):
                    # 打开图像
                    img = Image.open(filepath)
                    # 对图像进行水平，竖直，镜像，噪声
                    img_hflip = img.transpose(Image.FLIP_LEFT_RIGHT)
                    img_vflip = img.transpose(Image.FLIP_TOP_BOTTOM)
                    img_mirror = img.transpose(Image.FLIP_LEFT_RIGHT)
                    img_noise = add_gaussian_noise(img, noise_level)
                    # 保存翻转后的图像
                    img_hflip.save(os.path.join(subdir_path, 'hflip_' + filename))
                    img_vflip.save(os.path.join(subdir_path, 'vflip_' + filename))    
                    img_noise.save(os.path.join(subdir_path, 'noise_' + filename))
                    img_mirror.save(os.path.join(subdir_path, 'mirror_' + filename))


def SingleClassflipHorizontallyVertically(input_dir):
    # 单个类别的水平竖直翻转扩充根数据集
    for filename in os.listdir(input_dir):
        filepath = os.path.join(input_dir, filename)
        if os.path.isfile(filepath):
            # 打开图像
            img = Image.open(filepath)
            # 对图像进行水平翻转和竖直翻转
            flipped_image = img.transpose(Image.FLIP_LEFT_RIGHT)
            flipped_image_up_down = img.transpose(Image.FLIP_TOP_BOTTOM)
            
            # 保存处理后的图像
            flipped_image.save(os.path.join(input_dir, 'flipped_' + filename))
            flipped_image_up_down.save(os.path.join(input_dir, 'flipped_up_down_' + filename))
                

def split_dir_into_trainAndtest(root_dir, new_dir, test_ratio=0.2):
    '''
    root_dir 原始文件夹
    nerdir   新文件夹路径
    '''
    # 创建目标文件夹结构
    os.makedirs(new_dir, exist_ok=True)
    train_dir = os.path.join(new_dir, 'train')
    test_dir = os.path.join(new_dir, 'test')
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    
    # 遍历原始文件夹
    for class_dir in os.listdir(root_dir):
        class_path = os.path.join(root_dir, class_dir)
        if not os.path.isdir(class_path):
            continue
        
        # 创建训练集和测试集中的子文件夹
        train_class_dir = os.path.join(train_dir, class_dir)
        test_class_dir = os.path.join(test_dir, class_dir)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)
        
        # 获取类别文件夹中的图片文件列表
        image_files = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
        num_images = len(image_files)
        
        # 计算测试集的样本数量
        num_test_samples = int(num_images * test_ratio)
        
        # 随机选择测试集样本
        test_samples = random.sample(image_files, num_test_samples)
        
        # 将测试集样本移动到测试集文件夹
        for test_sample in test_samples:
            src_path = os.path.join(class_path, test_sample)
            dst_path = os.path.join(test_class_dir, test_sample)
            shutil.move(src_path, dst_path)
        
        # 将剩余的训练集样本移动到训练集文件夹
        for train_sample in os.listdir(class_path):
            src_path = os.path.join(class_path, train_sample)
            dst_path = os.path.join(train_class_dir, train_sample)
            shutil.move(src_path, dst_path)
    print("split_dir_into_trainAndtest Over")
    
    
def makeDecodeDatasetpreToNum(original_dataset_dir, new_dataset_dir, num):
    # 类别列表
    class_names = os.listdir(original_dataset_dir)

    # 每个类别要选取的图片数量
    num_images_per_class = num

    # 创建新文件夹
    os.makedirs(new_dataset_dir, exist_ok=True)

    # 从每个类别中随机选择指定数量的图片，并将它们复制到新文件夹中对应的子文件夹中
    for class_name in class_names:
        class_dir = os.path.join(original_dataset_dir, class_name)
        selected_images_dir = os.path.join(new_dataset_dir, class_name)
        os.makedirs(selected_images_dir, exist_ok=True)
    
        # 获取该类别下的所有图片文件名
        image_files = os.listdir(class_dir)
    
        # 随机选择指定数量的图片
        selected_images = random.sample(image_files, num_images_per_class)

        # 复制选中的图片到新文件夹的对应子文件夹中
        for image in selected_images:
            src = os.path.join(class_dir, image)
            dst = os.path.join(selected_images_dir, image)
            shutil.copyfile(src, dst)
    
    print("makeDecodeDatasetprtToNum Over")

def balance_file_count(root):
    '''
    将文件夹下的所有子文件夹的文件数目减至与最小文件夹相同
    '''
    # 获取root文件夹下的子文件夹列表
    subfolders = [f for f in os.listdir(root) if os.path.isdir(os.path.join(root, f))]

    # 获取最小文件数量
    min_file_count = min([len(os.listdir(os.path.join(root, subfolder))) for subfolder in subfolders])

    # 将每个子文件夹中的文件数量减至相同
    for subfolder in subfolders:
        subfolder_path = os.path.join(root, subfolder)
        files = os.listdir(subfolder_path)
        excess_files = files[min_file_count:]
        
        # 删除多余的文件
        for file in excess_files:
            file_path = os.path.join(subfolder_path, file)
            os.remove(file_path)
    print("balance_file_count Over")

    
def convert_image_format(root_dir, target_format=".JPG"):
    '''
    修改格式
    '''
    for subdir, dirs, files in os.walk(root_dir):
        for file in files:
            file_path = os.path.join(subdir, file)
            if file_path.endswith(".jpg") or file_path.endswith(".jpeg") or file_path.endswith(".png"):
                image = Image.open(file_path)
                new_file_path = os.path.splitext(file_path)[0] + target_format
                image.save(new_file_path, "JPEG")
                os.remove(file_path)


In [33]:
# convert_image_format('/root/images/wheat_leaf_512')
# split_dir_into_trainAndtest('/root/images/wheat_leaf_512', '/root/images/splited512', test_ratio=0.3)
# balance_file_count('/root/images/splited512/test')
# flipHorizontallyVertically('/root/images/splited512/train')
balance_file_count('/root/images/splited512/train')


balance_file_count Over
