In [1]:
import importlib

import numpy as np
import pandas as pd
import numpy
import lab_graph

importlib.reload(lab_graph)

#Fix colormath depreciated numpy method
def patch_asscalar(a):
    return a.item()
setattr(numpy, "asscalar", patch_asscalar)

df = pd.read_csv("data/all-colors-unique.csv")

#Create a 2D array of Lab values
lab_list = list(df["L*a*b Value"])
lab_list = [x.split(", ") for x in lab_list]
lab_points = [[float(x) for x in row] for row in lab_list]

#Use greedy approach
result = lab_graph.greedy_mdp(lab_points, 16)

#Create new dataframe with just the resultant indices
result_df = df.loc[result]

#lab_graph.generate_lab_3d_all_colors(result_df, "Euclidean", "N/a")

In [2]:
from scipy.spatial.distance import pdist, squareform
import colormath.color_objects as co
import colormath.color_diff as cdiff

euclidean_distances = squareform(pdist(lab_points))
#Retrieves the indices furthest points
i, j = np.unravel_index(np.argmax(euclidean_distances), euclidean_distances.shape)

print("___Euclidean Distance___")
print(df.iloc[i])
print(df.iloc[j])
# Use lab points

def lab_point_distance(color1, color2, delta_e="CIE2000"):
    """
    Compute the difference between two colors using a Delta E formula.

    :param color1: Lab color number 1
    :param color2: Lab color number 2
    :param delta_e: The comparison formula, CIE2000 (symmetric) is default
    :return: Returns the distance between two colors using the CIE2000 or CMC delta E formula
    """
    lab_1 = co.LabColor(color1[0], color1[1], color1[2])
    lab_2 = co.LabColor(color2[0], color2[1], color2[2])

    if delta_e == "CIE2000":
        return cdiff.delta_e_cie2000(lab_1, lab_2, Kl=2, Kc=1, Kh=1)
    if delta_e == "CMC":
        return cdiff.delta_e_cmc(lab_1, lab_2, pl=2, pc=1)

distances_delta_e = squareform(pdist(lab_points, metric=lab_point_distance))
i,j = np.unravel_index(np.argmax(distances_delta_e), distances_delta_e.shape)

print("\n___Delta_E_Distance___")
print(df.iloc[i])
print(df.iloc[j])


___Euclidean Distance___
Color Name                      Poppy
PMS Color Code                  2347C
L*a*b Value       49.45, 75.12, 67.21
Hex                            E10600
Name: 36, dtype: object
Color Name                         Teal
PMS Color Code                    3262C
L*a*b Value       66.44, -59.33, -12.05
Hex                              00BFB2
Name: 46, dtype: object

___Delta_E_Distance___
Color Name                      Berry
PMS Color Code                  233CP
L*a*b Value       45.38, 71.84, -7.30
Hex                            C6057B
Name: 6, dtype: object
Color Name               Heather Kelly
PMS Color Code                   340 C
L*a*b Value       51.80, -65.36, 14.55
Hex                             00965E
Name: 72, dtype: object


1. List of LAB points
2. Generate a distance matrix using Euclidean/Delta E distance
3. Select a pair of points (starting point for greedy algorithm)
4. Run greedy algorithm on distance matrix
5. Create a resultant dataframe
6. Calculate a score based off the result

In [3]:
def greedy_search(points, distance_matrix, number_of_teams, start_a, start_b):
    """
    Returns the list of indices corresponding to the colors in the dataframe found
    through the greedy search
    :param points: Set of lab color points the greedy search is being performed in
    :param distance_matrix: Matrix of distances between points
    :param number_of_teams: Total number of teams optimizing for
    :param start_a: Index of the color in the dataframe, distance matrix, and point set
    :param start_b: index of the color in the dataframe, distance matrix, and point set
    :return:
    """
    n = len(points)
    k = number_of_teams #Total number of teams
    selected = set()
    selected.add(start_a)
    selected.add(start_b)

    while len(selected) < k:
        max_min_dist = -1
        best_point = -1

        #For i in the set of all colors, index of a specific color
        for i in range(n):
            # If the index has not been chosen
            if i not in selected:
                #Find the point with the greatest min distance from those selected, provides an even distribution
                #This does not find the furthest point from the current point, it's better than that
                min_dist = min(distance_matrix[i][j] for j in selected)
                if min_dist > max_min_dist:
                    max_min_dist = min_dist
                    best_point = i

        selected.add(best_point)

    return list(selected)

def score(distance_matrix, chosen):
    """
    Scores the resultant set of points based off the average distance between all chosen points
    :param distance_matrix: Matrix representing the distances between colors
    :param chosen: Chosen points (from the set of colors)
    :return: The average distance between all the provided (chosen) colors
    """
    subset_distances = distance_matrix[np.ix_(chosen, chosen)]
    upper_triangle = np.triu(subset_distances, k=1)
    avg_distance = upper_triangle.sum() / (upper_triangle != 0).sum()

    return avg_distance

point_a, point_b = np.unravel_index(np.argmax(distances_delta_e), distances_delta_e.shape)
selected = greedy_search(lab_points, distances_delta_e, 16, point_a, point_b)

delta_e_score = score(distances_delta_e, selected)
euclidean_score = score(euclidean_distances, result)

print("Delta E: " + str(delta_e_score))
print("Euclidean: " + str(euclidean_score))

Delta E: 38.72694104952147
Euclidean: 78.72508932265154


Delta E: 38.77662607204613
Euclidean: 78.72508932265154

In [4]:
#Retrieve the hundred furthest points
#check if distances is referenced anywhere in here
def n_furthest_points(number_of_points, distance_matrix):
    """
    Computes the n furthest points
    :param number_of_points: The nth point you want computed
    :param distance_matrix: The distance matrix representing the distances between all points in the set
    :return: The most distant points in the matrix, from 0 to n
    """
    flat_indices = np.argpartition(distance_matrix.flatten(), -number_of_points)[-number_of_points:]
    # Convert to 2D indices
    i_indices, j_indices = np.unravel_index(flat_indices, distance_matrix.shape)
    # Get corresponding distances
    top_distances = distance_matrix[i_indices, j_indices]
    # Sort by distance (optional)
    sort_idx = np.argsort(top_distances)[::-1]
    i_indices = i_indices[sort_idx]
    j_indices = j_indices[sort_idx]
    top_distances = top_distances[sort_idx]

    return i_indices, j_indices, top_distances

#i_points, j_points, top_dist = n_furthest_points(100, euclidean_distances)

In [5]:
top_euclidean_sets = []
scores = []

df = pd.read_csv("data/all-colors-unique.csv")
euclidean_distances = squareform(pdist(lab_points))

lab_list = list(df["L*a*b Value"])
lab_list = [x.split(", ") for x in lab_list]
lab_points = [[float(x) for x in row] for row in lab_list]

#103 Choose 2 is 5253, which we can compute, however every item is represented twice, so it's actually 10,506
i_points, j_points, top_dist = n_furthest_points(10506, euclidean_distances)

total_teams = 16

for run in range(0, 10506, 2):
    search_result = greedy_search(lab_points, euclidean_distances, total_teams, i_points[run], j_points[run])

    search_score = score(euclidean_distances, search_result)

    top_euclidean_sets += [search_result]

    scores += [search_score]


# Greedy Search Results

In [6]:

def write_colors_to_json(full_color_dataframe, color_indexes, output_filename):
    subset = full_color_dataframe.iloc[color_indexes]

    subset_df = pd.DataFrame(subset)

    colors_json = subset_df.to_json(orient="records")

    # print(score(euclidean_distances, list(highest_random_tuples[0]))) 76.28587811716082
    with open(output_filename, "w") as color_file:
        color_file.write(colors_json)

In [7]:
# scores.sort()
#
# average_greedy = sum(scores)/len(scores)
# print("Average " + str(average_greedy))
# print("Lowest: " + str(scores[0]))
# print("Highest: " + str(scores [-1]))
#
# scores_set = set(scores)
# len(scores_set)

sorted_pairs = sorted(zip(scores, top_euclidean_sets), reverse=True)  # reverse=True for descending order
# Unzip the sorted pairs
sorted_scores, sorted_sets = zip(*sorted_pairs)

# Convert back to lists (zip creates tuples)
sorted_scores = list(sorted_scores)
sorted_sets = list(sorted_sets)

#write_colors_to_json(df, sorted_sets[0], "part_2/assets/highest_greedy_colors.json")
# write_colors_to_json(df, sorted_sets[-1], "part_2/assets/lowest_greedy_colors.json")

sorted_sets[0]

# Before implementing the skipping every other
# Lowest: 72.54711616260738
# Highest: 81.92102523616252
# The result is the same, yay!


[100, 4, 6, 70, 8, 36, 46, 47, 16, 49, 19, 52, 53, 20, 60, 93]

In [20]:
# how do the above scores compare to randomly selecting points
import random
random_tuples = set()

while len(random_tuples) < 100000:
    random_tuples.add(tuple(random.sample(range(0, 102), 16)))

# .868 + 2.680 + .022


In [21]:
random_scores = []
lowest_random_tuples = []
highest_random_tuples = []
for random_tuple in random_tuples:
    random_score = score(euclidean_distances, list(random_tuple))
    if random_score < 32:
        lowest_random_tuples += [list(random_tuple)]
    if random_score > 75:
        highest_random_tuples += [list(random_tuple)]
    random_scores += [random_score]

print("Highest Random: ", highest_random_tuples[0])
print("Lowest Random: ", lowest_random_tuples[0])


Highest Random:  [24, 94, 75, 10, 36, 72, 53, 81, 1, 12, 20, 77, 19, 47, 101, 64]
Lowest Random:  [77, 48, 1, 59, 57, 16, 65, 97, 81, 54, 18, 43, 55, 14, 27, 15]


# Random Search Results

In [10]:
random_scores.sort()

mean_random_score = sum(random_scores)/len(random_scores)
print("Average: " + str(mean_random_score))
print("Lowest: " + str(random_scores[0]))
print("Highest: " + str(random_scores[-1]))

Average: 53.413049376538986
Lowest: 30.162168953354183
Highest: 79.26322185207914


In [22]:
import plotly.express as px

fig = px.histogram(random_scores, x=random_scores, nbins=100, title="Distance Score Histogram")

fig.show()

In [11]:
import json
subset = df.iloc[[99, 58, 0, 54, 59, 43, 96, 3, 57, 97, 8, 35, 67, 21, 14, 17]]

lowest_random_df = pd.DataFrame(subset)

lowest_json = lowest_random_df.to_json(orient="records")

print(score(euclidean_distances, list(lowest_random_tuples[0])))
# with open("part_2/assets/lowest_random_sample_colors.json", "w") as file:
#     file.write(lowest_json)
# lab_graph.generate_lab_3d_all_colors(lowest_random_df, "Bad Random")


31.650132471149163


In [12]:
h_subset = df.iloc[[65, 24, 68, 70, 53, 72, 0, 100, 94, 71, 50, 36, 78, 10, 37, 13]]

highest_random_df = pd.DataFrame(h_subset)

highest_json = highest_random_df.to_json(orient="records")

# print(score(euclidean_distances, list(highest_random_tuples[0]))) 76.28587811716082
with open("part_2/assets/highest_random_sample_colors.json", "w") as file:
    file.write(highest_json)

Everything from here down doesn't really work because LAB to RGB conversion isn't perfect.
The general flow of the cells is

- Attempting to plot the LAB points in 3D space with RGB values
- Realizing this doesn't work and trying to figure out a way to make it work

In [13]:
# Let's come up with a better scoring metric
import colormath.color_conversions as color_convert
from colormath.color_objects import sRGBColor
from colormath.color_objects import LabColor
from scipy.spatial import distance

all_colors = []
exclusion_array = []
excluded_lab_distances = []
y_step = 2

#TODO: Obviously the lab differences will be constant, because it is uniform, need to go RGB and then BACK to LAB (somehow)
for x in range(-127, 128, 8):
    for z in range(-127, 128, 8):
        for y in range(0, 101, y_step):
            color_name = " "
            pms_color_code = " "
            lab_value = str(y) + ", " + str(x) + ", " + str(z)

            lab_color = co.LabColor(y, x, z)
            rgb_color = color_convert.convert_color(lab_color, sRGBColor)

            lab_color_above = co.LabColor(y + y_step, x, z)
            rgb_color_above = color_convert.convert_color(lab_color_above, sRGBColor)

            color_tuple = sRGBColor.get_value_tuple(rgb_color)
            color_tuple_above = sRGBColor.get_value_tuple(rgb_color_above)

            color_distance = distance.euclidean(color_tuple, color_tuple_above)

            if color_tuple[0] > 1 or color_tuple[1] > 1 or color_tuple[2] > 1:
                d = cdiff.delta_e_cie2000(lab_color, lab_color_above, Kl=2, Kc=1, Kh=1)
                excluded_lab_distances += [d]
                d = cdiff.delta_e_cie2000(lab_color, co.LabColor(y - y_step, x, z),Kl=2, Kc=1, Kh=1 )
                break

            #what is the average distance between removed colors?

            hex_color = sRGBColor.get_rgb_hex(rgb_color)
            all_colors += [[color_name, pms_color_code, lab_value, hex_color[1:7].upper(), rgb_color, lab_color]]

In [14]:
importlib.reload(lab_graph)

full_space = pd.DataFrame(all_colors, columns=["Color Name", "PMS Color Code", "L*a*b Value", "Hex", "RGB", "LAB"])

excluded_lab_distances.sort()
excluded_lab_distances
#with observer of 2, f2, 33966 colors
#with observer of 2, d65, 33938 colors


[0.5675125154060172,
 0.5675125154060172,
 0.5675125154060172,
 0.5675125154060172,
 0.5675125154060172,
 0.5675125154060172,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5773812839118924,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501567523,
 0.5876027501

In [15]:
#lab_graph.generate_lab_3d_all_colors(full_space, "test", "n/a")

In [16]:
full_space.head()

Unnamed: 0,Color Name,PMS Color Code,L*a*b Value,Hex,RGB,LAB
0,,,"0, -127, -127",003BC1,sRGBColor (rgb_r:0.0000 rgb_g:0.2308 rgb_b:0.7...,LabColor (lab_l:0.0000 lab_a:-127.0000 lab_b:-...
1,,,"2, -127, -127",003DC7,sRGBColor (rgb_r:0.0000 rgb_g:0.2389 rgb_b:0.7...,LabColor (lab_l:2.0000 lab_a:-127.0000 lab_b:-...
2,,,"4, -127, -127",003FCD,sRGBColor (rgb_r:0.0000 rgb_g:0.2468 rgb_b:0.8...,LabColor (lab_l:4.0000 lab_a:-127.0000 lab_b:-...
3,,,"6, -127, -127",0041D2,sRGBColor (rgb_r:0.0000 rgb_g:0.2546 rgb_b:0.8...,LabColor (lab_l:6.0000 lab_a:-127.0000 lab_b:-...
4,,,"8, -127, -127",0043D8,sRGBColor (rgb_r:0.0000 rgb_g:0.2621 rgb_b:0.8...,LabColor (lab_l:8.0000 lab_a:-127.0000 lab_b:-...


In [17]:
#004DEE vs 0055FA vs 005D10

#distances_delta_e = squareform(pdist(lab_points, metric=lab_point_distance))

#Convert rgb colors to a list of vectors

from scipy.spatial import distance

color_list = []
for color in full_space["RGB"]:
    r = color.rgb_r
    g = color.rgb_g
    b = color.rgb_b

    color_list += [[r,g,b]]

rgb_distances = squareform(pdist(color_list))

removal_array = []
#TODO I need to define these in variables or I'm going to keep messing this up
for y in range(0,101, 4):
    for x in range(-127, 127, 24):
        for z in range(-127, 127, 24):
            # Get the current item
            # Get the item above (if possible)
            lab_value = str(y) + ", " + str(x) + ", " + str(z)
            current = full_space.loc[full_space["L*a*b Value"] == lab_value]
            current_rgb = [current.iloc[0].RGB.rgb_r, current.iloc[0].RGB.rgb_g, current.iloc[0].RGB.rgb_b]

            if y + 4 < 100:
                lab_value_above = str(y + 4) + ", " + str(x) + ", " + str(z)
                above = full_space.loc[full_space["L*a*b Value"] == lab_value_above]
                above_rgb = [above.iloc[0].RGB.rgb_r, above.iloc[0].RGB.rgb_g, above.iloc[0].RGB.rgb_b]
            else:
                continue

            color_distance = distance.euclidean(current_rgb, above_rgb)

            if color_distance >= 1:
                print("The distance is greater than 1")
                removal_array += [[y, x, z]]

len(removal_array)
#rgb_distances




IndexError: single positional indexer is out-of-bounds

In [None]:
below = full_space.loc[full_space["L*a*b Value"] == "16, -127, -127"]
current = full_space.loc[full_space["L*a*b Value"] == "20, -127, -127"]
above = full_space.loc[full_space["L*a*b Value"] == "24, -127, -127"]

b_rgb = [below.iloc[0].RGB.rgb_r, below.iloc[0].RGB.rgb_g, below.iloc[0].RGB.rgb_b]
c_rgb = [current.iloc[0].RGB.rgb_r, current.iloc[0].RGB.rgb_g, current.iloc[0].RGB.rgb_b]
a_rgb = [above.iloc[0].RGB.rgb_r, above.iloc[0].RGB.rgb_g, above.iloc[0].RGB.rgb_b]

c_lab = current.iloc[0].LAB
a_lab = above.iloc[0].LAB

b_rgb_to_lab = color_convert.convert_color(below.iloc[0].RGB, LabColor)
c_rgb_to_lab = color_convert.convert_color(current.iloc[0].RGB, LabColor)
a_rgb_to_lab = color_convert.convert_color(above.iloc[0].RGB, LabColor)

below.iloc[0].RGB

In [None]:

bottom = co.LabColor(16, -127, -127)

top = co.LabColor(24, -127, -127)

bottom_rgb = color_convert.convert_color(bottom, sRGBColor)
top_rgb = color_convert.convert_color(top, sRGBColor)

bottom_backwards = color_convert.convert_color(bottom_rgb, LabColor)

sRGBColor.get_rgb_hex(bottom_rgb)
# Use vectors to compute the RGB difference, pure hex difference is no good

# For optimal space -> Plot the volume, and then collapse it onto the current set of points?
# Also try and create a super low poly version of the full space and use that as the "target" difference

In [None]:
# in_space = co.LabColor(0, -127, 121)
in_space = co.LabColor(0, -127, -127)

in_space_rgb = color_convert.convert_color(in_space, sRGBColor)
in_space_backwards = color_convert.convert_color(in_space_rgb, LabColor)
in_space_backwards_forwards = color_convert.convert_color(in_space_backwards, sRGBColor)

print(in_space)
print(in_space_rgb)
print(in_space_backwards)
print(in_space_backwards_forwards)
color_diff = cdiff.delta_e_cie2000(in_space, in_space_backwards, Kl=2, Kc=1, Kh=1)

print(color_diff)

print(type(in_space_rgb.rgb_r))

'''
Acceptable
1.2156301576013084
0.6820364807078592
0.8342017249970183
'''