In [1]:
import numpy as np
import pandas as pd
from color_utils import RGB, RYB
from numpy.typing import NDArray
from pathlib import Path
from PIL import Image
from tqdm.notebook import tqdm


out_name = "database_w_colors.csv"
color_RGB_col, color_RYB_col = "color_RGB", "color_RYB"

if not Path(out_name).exists():
    database = pd.read_csv("database.csv")
    database[color_RGB_col], database[color_RYB_col] = "", ""
    database.to_csv(out_name, index=False)

database = pd.read_csv(out_name)
start = database[color_RGB_col].isna().where(lambda x: x == True).first_valid_index()
data_root_path = Path.cwd().parent

progress = tqdm(total=21000)
progress.update(start)
i = start


def assign_colors(
    df: pd.DataFrame,
    row_idx: int,
    image: NDArray[np.uint8],
) -> None:
    df.at[row_idx, color_RGB_col] = RGB.get_dominant_color(image)
    df.at[row_idx, color_RYB_col] = RYB.get_dominant_color(image)


while i < 21000:
    path = data_root_path / Path(database.at[i, "path"])

    with Image.open(path, formats=("JPEG",)) as image:
        try:
            assign_colors(database, i, image)
        except IndexError:
            with open("bad_images.txt", "a") as bad_images:
                bad_images.write(str(path) + "\n")

            image.resize((224, 224)).save(path, "JPEG")
            continue
        except Exception as ex:
            with open("error.log", "a") as log:
                log.write(str(ex))

            raise

    i += 1
    progress.update()

    if i % 100 == 0:
        database.to_csv(out_name, index=False)

database.to_csv(out_name, index=False)

  0%|          | 0/21000 [00:00<?, ?it/s]