## Color Palettes from Images - dj
last updated: 04/04/2022 <br>
author: danielle.heymann <br>
<br>
I made this notebook so that we can derive color palettes from the pixel colors of the highest frequency in an image <br>
reference: https://pyimagesearch.com/2014/05/26/opencv-python-k-means-color-clustering/#:~:text=Now%20that%20are%20data%20points%20are%20prepared%2C%20we,implementation%20of%20k-means%20to%20avoid%20re-implementing%20the%20algorithm.

In [None]:
# Libraries
import pandas as pd
import os
from pathlib2 import Path
import sys
import numpy as np
import math
import re
import glob
import seaborn as sns
from PIL import Image
import matplotlib.pyplot as plt
import ntpath
import webcolors

from sklearn.cluster import KMeans
import cv2

In [None]:
# Establish data path
data_dir = str(Path("datapath").resolve())

# neat views
pd.set_option("display.max_rows", None, "display.max_columns", None)
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
# get_color_palette functions with kmeans

def centroid_histogram(clt):
    # grab the number of different clusters and create a histogram
    # based on the number of pixels assigned to each cluster
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    (hist, _) = np.histogram(clt.labels_, bins = numLabels)
    # normalize the histogram, such that it sums to one
    hist = hist.astype("float")
    hist /= hist.sum()
    # return the histogram
    return hist

def plot_colors(hist, centroids, image_path):
    # initialize the bar chart representing the relative frequency
    # of each of the colors
    bar = np.zeros((50, 300, 3), dtype = "uint8")
    startX = 0
    # loop over the percentage of each cluster and the color of
    # each cluster
    colors = []
    for (percent, color) in zip(hist, centroids):
        colors = colors + [webcolors.rgb_to_hex(color.astype("uint8").tolist())]
        
    x = colors
    h = [1]*len(colors)
    c = colors

    # print the filename of the image
    print(" \n" + ntpath.basename(image_path))

    # print the palette
    #bar plot
    def addlabels(x,y):
        for i in range(len(x)):
            plt.text(i-.3,y[i],x[i])

    plt.figure(figsize =(20,1))
    plt.xticks([])
    plt.yticks([])        
    for pos in ['right', 'top', 'bottom', 'left']:
        plt.gca().spines[pos].set_visible(False)

    addlabels(x,h)
    plt.bar(x, height = h, color = c)
    plt.show()
    
    print(colors)
    print(" \n \n \n \n \n")    
    

In [None]:
# loop through image dir and get list of images that we will make individual color palettes for 
types = ('*.jpg', '*.png') # the tuple of file types
images = []
for t in types:
    images = images + (glob.glob(data_dir +"/**/" + t, recursive = True))    
images

for i in images:
    image = cv2.imread(i)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    #get_color_palette(im, i)
    plt.figure()
    plt.axis("off")
    plt.imshow(image)
    
    # reshape so that we've got a list of pixels instead of array
    image = image.reshape((image.shape[0] * image.shape[1],3))

    # cluster the pixel intensities
    clt = KMeans(n_clusters = 8)
    clt.fit(image)

    
    # show our color barplot
    hist = centroid_histogram(clt)
    bar = plot_colors(hist, clt.cluster_centers_, i)
