In [1]:
import os
from tqdm import tqdm
from PIL import Image, ImageChops
import pandas as pd

In [2]:
def is_greyscale(im):
    """
    Check if image is monochrome (1 channel or 3 identical channels)
    """
    if im.mode not in ("L", "RGB"):
        raise ValueError("Unsuported image mode")

    if im.mode == "RGB":
        rgb = im.split()
        if ImageChops.difference(rgb[0],rgb[1]).getextrema()[1]!=0: 
            return False
        if ImageChops.difference(rgb[0],rgb[2]).getextrema()[1]!=0: 
            return False
    return True

In [4]:
DATA_PATH = '/app/data/imagenet_data/'
TRAIN_PATH = os.path.join(DATA_PATH, 'train')
VALI_PATH = os.path.join(DATA_PATH, 'vali')
TEST_PATH = os.path.join(DATA_PATH, 'test')

TRAIN_COLOR_PATH = os.path.join(TRAIN_PATH, 'color')
VALI_COLOR_PATH = os.path.join(VALI_PATH, 'color')
TEST_COLOR_PATH = os.path.join(TEST_PATH, 'color')

TRAIN_GRAYSCALE_PATH = os.path.join(TRAIN_PATH, 'grayscale')
VALI_GRAYSCALE_PATH = os.path.join(VALI_PATH, 'grayscale')
TEST_GRAYSCALE_PATH = os.path.join(TEST_PATH, 'grayscale')

In [5]:
def find_grayscale_in_dir(folder_path: str, folder_gray_path: str, func = is_greyscale) -> list:
    images = []
    for img_name in tqdm(os.listdir(folder_path)):
        full_img_path = os.path.join(folder_path, img_name)
        img = Image.open(full_img_path)
        if func(img):
            images.append(full_img_path)
            images.append(os.path.join(folder_gray_path, img_name))
    return images

In [6]:
gray_img_test = find_grayscale_in_dir(TEST_COLOR_PATH, TEST_GRAYSCALE_PATH)

100%|██████████| 8041/8041 [00:15<00:00, 502.61it/s]


In [7]:
gray_img_vali = find_grayscale_in_dir(VALI_COLOR_PATH, VALI_GRAYSCALE_PATH)

100%|██████████| 16081/16081 [00:30<00:00, 523.80it/s]


In [8]:
gray_img_train = find_grayscale_in_dir(TRAIN_COLOR_PATH, TRAIN_GRAYSCALE_PATH)

100%|██████████| 56282/56282 [01:49<00:00, 515.50it/s]


In [9]:
len(gray_img_train), len(gray_img_vali), len(gray_img_test), len(gray_img_train) + len(gray_img_vali) + len(gray_img_test)

(2638, 790, 356, 3784)

In [10]:
all_gray = gray_img_train + gray_img_vali + gray_img_test