# Dataset Preparation and Green-Blind Simulation for CNN Training

## Simulate Green-Blind Color Vision (Deuteranomaly)

In [25]:
pip install colorspacious


Note: you may need to restart the kernel to use updated packages.


In [23]:
import os
import numpy as np
from matplotlib.image import imread, imsave
from colorspacious import cspace_convert

input_folder = 'raw_data/data'
output_folder = 'raw_data/Simulated_GreenBlind50'

#Make sure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Green-blind simulation configuration
cvd_space = {
    "name": "sRGB1+CVD",
    "cvd_type": "deuteranomaly", #green
    "severity": 50
}

# Process each image
for filename in os.listdir(input_folder):
    if filename.lower().endswith((".png", ".jpg", ".jpeg")):
        img_path = os.path.join(input_folder, filename)
        img = imread(img_path)

        if img.max() > 1.0:
            img = img / 255.0

        simulated_img = cspace_convert(img, cvd_space, "sRGB1")
        simulated_img = np.clip(simulated_img, 0, 1)

        output_path = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}_greenblind.png")
        imsave(output_path, simulated_img)

print("Completed green color blindness simulation!")

✅ Completed green color blindness simulation!


## Organizing Image Dataset into Subfolders per Class

### normal version data

In [1]:

import os
import shutil

source_folder = 'raw_data/data'

for filename in os.listdir(source_folder):
    if filename[0].isdigit():
        first_digit = filename[0] 
        target_folder = os.path.join(source_folder, first_digit)

        os.makedirs(target_folder, exist_ok=True)

        src_path = os.path.join(source_folder, filename)
        dst_path = os.path.join(target_folder, filename)

        shutil.move(src_path, dst_path)

### Simulated_GreenBlind data

In [2]:
import os
import shutil

source_folder = 'raw_data/Simulated_GreenBlind50'

for filename in os.listdir(source_folder):
    if filename[0].isdigit():
        first_digit = filename[0] 
        target_folder = os.path.join(source_folder, first_digit)

        os.makedirs(target_folder, exist_ok=True)

        src_path = os.path.join(source_folder, filename)
        dst_path = os.path.join(target_folder, filename)

        shutil.move(src_path, dst_path)

## Merge Simulated and Original Image Data into One Dataset

In [None]:
import os
import shutil

source1 = 'raw_data/data'
source2 ='raw_data/Simulated_GreenBlind50'
destination = 'raw_data/merged_data'

os.makedirs(destination, exist_ok=True)

for class_name in [str(i) for i in range(10)]:
    class_path1 = os.path.join(source1, class_name)
    class_path2 = os.path.join(source2, class_name)
    dest_class_path = os.path.join(destination, class_name)

    os.makedirs(dest_class_path, exist_ok=True)

    for file in os.listdir(class_path1):
        shutil.copy(os.path.join(class_path1, file), os.path.join(dest_class_path, file))

    for file in os.listdir(class_path2):
        new_name = f"sim_{file}"
        shutil.copy(os.path.join(class_path2, file), os.path.join(dest_class_path, new_name))

print("✅ Done merging the datasets!")

## Image Class Count Summary

### normal version data

In [2]:
import os

data_path ='raw_data/data'

for folder in sorted(os.listdir(data_path)):
    folder_path = os.path.join(data_path, folder)
    if os.path.isdir(folder_path):
        num_images = len([f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
        print(f"Class '{folder}': {num_images} images")


Class '0': 128 images
Class '1': 140 images
Class '2': 144 images
Class '3': 152 images
Class '4': 132 images
Class '5': 156 images
Class '6': 132 images
Class '7': 140 images
Class '8': 148 images
Class '9': 128 images


### merged data

In [9]:
data_path ='raw_data/merged_data'
for folder in sorted(os.listdir(data_path)):
    folder_path = os.path.join(data_path, folder)
    if os.path.isdir(folder_path):
        num_images = len([f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
        print(f"Class '{folder}': {num_images} images")

Class '0': 256 images
Class '1': 280 images
Class '2': 288 images
Class '3': 304 images
Class '4': 264 images
Class '5': 312 images
Class '6': 264 images
Class '7': 280 images
Class '8': 296 images
Class '9': 256 images
