In [3]:
from PIL import Image
import os
import numpy as np


input_folder = "C:\\Users\\Faraz\\Desktop\\Data Science\\horse2zebra\\trainA"
output_folder = "C:\\Users\\Faraz\\Desktop\\Data Science\\horse2zebra\\trainA\\processed_images"

2. Image Preprocessing



Resize each image to a uniform size of 224x224 pixels using libraries like TensorFlow or Pillow.

Normalize the pixel values, scaling them between 0 and 1, to prepare the images for machine learning.


In [4]:
os.makedirs(output_folder, exist_ok=True)

def preprocess_images_pillow(input_folder, output_folder):
    for filename in os.listdir(input_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):  
            img_path = os.path.join(input_folder, filename)
            with Image.open(img_path) as img:

                img = img.resize((224, 224))
                img_array = np.array(img) / 255.0 
                

                processed_img = Image.fromarray((img_array * 255).astype(np.uint8))
                processed_img.save(os.path.join(output_folder, filename))
            print(f"Processed: {filename}")

preprocess_images_pillow(input_folder, output_folder)

Processed: n02381460_1001.jpg
Processed: n02381460_1002.jpg
Processed: n02381460_1003.jpg
Processed: n02381460_1006.jpg
Processed: n02381460_1008.jpg
Processed: n02381460_1009.jpg
Processed: n02381460_1011.jpg
Processed: n02381460_1014.jpg
Processed: n02381460_1019.jpg
Processed: n02381460_102.jpg
Processed: n02381460_1023.jpg
Processed: n02381460_1025.jpg
Processed: n02381460_1027.jpg
Processed: n02381460_1028.jpg
Processed: n02381460_1034.jpg
Processed: n02381460_1035.jpg
Processed: n02381460_1037.jpg
Processed: n02381460_1038.jpg
Processed: n02381460_1044.jpg
Processed: n02381460_1045.jpg
Processed: n02381460_1048.jpg
Processed: n02381460_105.jpg
Processed: n02381460_1051.jpg
Processed: n02381460_1052.jpg
Processed: n02381460_1053.jpg
Processed: n02381460_1058.jpg
Processed: n02381460_1063.jpg
Processed: n02381460_1068.jpg
Processed: n02381460_1074.jpg
Processed: n02381460_1075.jpg
Processed: n02381460_108.jpg
Processed: n02381460_1082.jpg
Processed: n02381460_1083.jpg
Processed: n0

3. Image Augmentation



Apply the following transformations to the collected images:

Rotate (up to 45 degrees)

Flip (horizontally and vertically)

Zoom in or out

Random cropping

Adjust brightness

You can use Keras ImageDataGenerator class or TensorFlow tf.image module for this task.

Generate five augmented versions of each image.


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from tensorflow.keras.preprocessing import image


input_folder = "C:\\Users\\Faraz\\Desktop\\Data Science\\horse2zebra\\trainA\\processed_images"
output_folder = "C:\\Users\\Faraz\\Desktop\\Data Science\\horse2zebra\\trainA\\augmented_images"

os.makedirs(output_folder, exist_ok=True)


datagen = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=0.2,
    height_shift_range=0.2,
    brightness_range=[0.5, 1.5],
    zoom_range=[0.8, 1.2],
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

# Function to augment images
def augment_images(input_folder, output_folder, num_augmented=5):
    for filename in os.listdir(input_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):  
            img_path = os.path.join(input_folder, filename)
            img = image.load_img(img_path)
            x = image.img_to_array(img)  
            x = x.reshape((1,) + x.shape)  


            i = 0
            for batch in datagen.flow(x, batch_size=1, save_to_dir=output_folder, 
                                      save_prefix='aug', save_format='jpeg'):
                i += 1
                if i >= num_augmented:  
                    break
            print(f"Augmented: {filename}")


augment_images(input_folder, output_folder)



Augmented: n02381460_1001.jpg
Augmented: n02381460_1002.jpg
Augmented: n02381460_1003.jpg
Augmented: n02381460_1006.jpg
Augmented: n02381460_1008.jpg
Augmented: n02381460_1009.jpg
Augmented: n02381460_1011.jpg
Augmented: n02381460_1014.jpg
Augmented: n02381460_1019.jpg
Augmented: n02381460_102.jpg
Augmented: n02381460_1023.jpg
Augmented: n02381460_1025.jpg
Augmented: n02381460_1027.jpg
Augmented: n02381460_1028.jpg
Augmented: n02381460_1034.jpg
Augmented: n02381460_1035.jpg
Augmented: n02381460_1037.jpg
Augmented: n02381460_1038.jpg
Augmented: n02381460_1044.jpg
Augmented: n02381460_1045.jpg
Augmented: n02381460_1048.jpg
Augmented: n02381460_105.jpg
Augmented: n02381460_1051.jpg
Augmented: n02381460_1052.jpg
Augmented: n02381460_1053.jpg
Augmented: n02381460_1058.jpg
Augmented: n02381460_1063.jpg
Augmented: n02381460_1068.jpg
Augmented: n02381460_1074.jpg
Augmented: n02381460_1075.jpg
Augmented: n02381460_108.jpg
Augmented: n02381460_1082.jpg
Augmented: n02381460_1083.jpg
Augmented: n

4. File Organization



Save the augmented images in a separate folder from the original images.

Ensure each image, both original and augmented, has a unique name.

Organize the images into two categories, one for original photos and one for the augmented versions.


In [6]:
import os
import shutil


base_folder = "C:\\Users\\Faraz\\Desktop\\Data Science\\horse2zebra"
original_folder = os.path.join(base_folder, "original_images")
augmented_folder = os.path.join(base_folder, "augmented_images")
processed_images_folder = os.path.join(base_folder, "trainA\\processed_images")
augmented_images_folder = os.path.join(base_folder, "trainA\\augmented_images")


os.makedirs(original_folder, exist_ok=True)
os.makedirs(augmented_folder, exist_ok=True)


def organize_original_images():
    for filename in os.listdir(processed_images_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"): 
            src_path = os.path.join(processed_images_folder, filename)
            dst_path = os.path.join(original_folder, f"original_{filename}")
            shutil.move(src_path, dst_path)
    print("Original images organized.")

def organize_augmented_images():
    for filename in os.listdir(augmented_images_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):  
            src_path = os.path.join(augmented_images_folder, filename)
            dst_path = os.path.join(augmented_folder, f"augmented_{filename}")
            shutil.move(src_path, dst_path)
    print("Augmented images organized.")

organize_original_images()
organize_augmented_images()


Original images organized.
Augmented images organized.
