# Street Fashion Colors



### Importing libraries and defining functions


In [None]:
#############################################################################################################
##### 1. Import all necessary libraries.                                                                #####
#############################################################################################################
## If an error is given about cv2, install opencv-python by uncommenting that line.
#!conda install -c conda-forge opencv
#!echo -e "y\n" | conda install -c anaconda scikit-learn
#!echo -e "y\n" | conda install -c anaconda scipy
#!echo -e "y\n" | conda install -c conda-forge scikit-image
#!echo -e "y\n" | conda install matplotlib
#!echo -e "y\n" | conda install pandas
!echo -e "y\n" | conda install -c conda-forge opencv


import json, matplotlib, os, colorsys, scipy, scipy.spatial.distance, numpy as np, pandas as pd, \
    matplotlib.pyplot as plt, skimage, sklearn, re, cv2
from ast import literal_eval ## converting string renditions of arrays back to lists
from math import sqrt
from PIL import Image
from collections import Counter
from skimage import color
from skimage import data
from sklearn.cluster import KMeans


#############################################################################################################
#####    Definition of a function that returns the last digits of an int                               ######
#############################################################################################################
def get_last_digits(num, digits=2):
    return num % 10**digits

#############################################################################################################
#####   Definition of a function that maps labels to colors specified in config                        ######
#############################################################################################################
def apply_color_map(image_array, labels):
    color_array = np.zeros((image_array.shape[0], image_array.shape[1], 3), dtype=np.uint8)

    for label_id, label in enumerate(labels):
        ## set all pixels with the current label to the color of the current label
        color_array[image_array == label_id] = label["color"]

    return color_array


In [None]:
### import image and convert to pixels array

def loadimage(imagepath):
  #global imagepath
  imagepath=str(imagepath)
  img=cv2.imread(imagepath)
  img = np.array(img)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # making sure the colors show up correctly when we plot it
  # Plot the image, for debugging
  # plt.imshow(img)
  # plt.axis('off')  # Turn off the axis labels and ticks (optional)
  # plt.show()
  global pixels
  pixels = np.float32(img).reshape(-1, 3)

## logic to remove black pixels from the pixels array
  #o=pixels
  #N=0
  #for i in range(o.shape[0]):
  #  if (o[i,0] == 0) and (o[i,1]==0) and (o[i,2]==0): ## the pixel is black
  #    N+=0
  #  else:
  #    N+=1 ## the pixel is not black
  #result=np.zeros(shape=(N,3))
  #c=0
  #for i in range(o.shape[0]):
  #    if (o[i,0]==0) and (o[i,1]==0) and (o[i,2]==0):
  #        continue
  #result[c,:]=o[i,:]
  #c+=1
  #pixels = np.float32(result)

#loadimage('6kpeds/0abebbdfdc8148b3af66faf5455dcdf9_11.png')
#print(pixels) # for debugging


#### Run k-means to identify largest color clusters

In [None]:
#############################################################################################################
#####  APPLYING K-MEANS                                                                                ######
#############################################################################################################
def apply_kmeans(pixels):
    global labarr_r
    global kmlabels_
    global rgbarr_r
    global countsarr
    global kmcluster_centers_
    global palettecounts

    ## step 1: assign the values of pixels to bins of 1-16 for R,G,B and create histogram
    ## maxbins=[15,31,47,63,79,95,111,127,143,159,175,191,207,223,239,255] #maximum value of each bin
    pixelbins=(np.float32(pixels)//16) # calculates bins from 0-255 to 0-15 (16 values)
    binarr=[]
    for row in range(len(pixelbins)-1): # combine the pixel bins into one value per row, preceded by "1"
        binstr="1{:02d}{:02d}{:02d}".format(int(pixelbins[row,0]),int(pixelbins[row,1]),int(pixelbins[row,2]))
        binarr.append(int(binstr))
    counts = dict(Counter(binarr)) #histogram (counts per item) is stored as dictionary

    ## step 2: for each bin, compute mean color in LAB space
    ## right now, taking bin edge color and not average color per bin
    ## convert back from combined int to RGB tuple
    labarr=[]
    countsarr=[]
    rgbarr=[]
    for item in counts:
        b = (get_last_digits(item,2)+1)/16 #converting from valu 0-15 into value 0-1
        g = (((get_last_digits(item,4)-b)//100)+1)/16
        r = (((get_last_digits(item,6)-g-b)//10000)+1)/16
        rgbitem=[r,g,b]
        rgbarr.append(rgbitem)
    ## convert from RGB to LAB
        labitem=(skimage.color.rgb2lab([r,g,b]))
        labarr.append(labitem)
        countsarr.append(counts.get(item))
    labarr_r=np.float32(labarr).reshape(-1, 3)
    rgbarr_r=np.float32(rgbarr).reshape(-1, 3)
    countsarr=np.float32(countsarr)


    ## step 3: we now have an array with counts for each color, and an array with the corresponding LAB colors
    ## (based on the bin edge). these are countsarr and labarr
    ## we use weighted k-means instead of k-means, as we are looking at counts of points and not just points
    X=labarr_r
    sample_weight=countsarr
    clustercount=5
    if len(countsarr)<5:
        print("Skipping segment because the number of colors < k")
        skipseg='true'
        return None, None
    km = KMeans(
        n_clusters=clustercount, init='k-means++', n_init=10, max_iter=300, tol=1e-04, random_state=0
    )
    y_km = km.fit_predict(X, sample_weight=countsarr)

    kmlabels_=km.labels_
    kmcluster_centers_=km.cluster_centers_
    palettecounts=[np.sum(kmlabels_ == 0),np.sum(kmlabels_ == 1),np.sum(kmlabels_ == 2),np.sum(kmlabels_ == 3),np.sum(kmlabels_ == 4)]
    print("Palette counts:",palettecounts)
    print(kmcluster_centers_,pixels,palettecounts)
    #palette=np.uint8([rgb_cluster_centers])
    palette_arr.append(np.uint8(kmcluster_centers_))
    palettecounts_arr.append(np.uint8(palettecounts))
    return(kmcluster_centers_,palettecounts)
    


### Make a 3D plot

In [None]:
def make_3d_plot(labarr_r,kmlabels_,rgbarr_r, countsarr, kmcluster_centers_):
    #if vis_3dplot=='false': #skip this function if the user specified not to visualize 3d plot
    #    return

    ## step 4: make a scatterplot showing the bins and bin sizes in LAB along with clusters from k-means
    plt.rcParams["figure.figsize"] = [21.00, 10.50]
    plt.rcParams["figure.autolayout"] = True
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    data = labarr_r
    #plotting each color that appears in the image as a point
    L,a,b = labarr_r[:,0],labarr_r[:,1],labarr_r[:,2]
    label = kmlabels_
    ax.scatter(L, a, b, c=rgbarr_r, s=countsarr*6,alpha=0.5) #use c=label for cluster colors
    #plotting the centers too
    Lc,ac,bc = kmcluster_centers_[:,0],kmcluster_centers_[:,1],kmcluster_centers_[:,2]
    ax.scatter(Lc, ac, bc, marker='*', c='red',s=100, alpha=1)
    ax.set_xlabel('L')
    ax.set_ylabel('a')
    ax.set_zlabel('b')
    plt.grid()
    plt.show()

In [None]:

#############################################################################################################
#####       Building our segment and palette plots                                                     ######
#############################################################################################################
def plot_seg_palette(kmcluster_centers_,img,palettecounts):
    rows=kmcluster_centers_.shape[0]
    rgb_cluster_centers=[]
    for row in range(rows):
        lab=kmcluster_centers_[row,:]
        rgb=skimage.color.lab2rgb(lab)*255
        rgb=rgb.reshape(1,3)
        rgb_cluster_centers.append([rgb[0,0],rgb[0,1],rgb[0,2]])

    palette=np.uint8([rgb_cluster_centers])
    palette_arr.append(np.uint8(rgb_cluster_centers))
    palettecounts_arr.append(np.uint8(palettecounts))
    ### plotting original filtered segment and dominant colors

    #if vis_seg_palette=='false': #skip this function if the user specified not to visualize seg + pal
    #    return
    fig, (ax0,ax1) = plt.subplots(1, 2, figsize=(3,3))
    ax0.imshow(img)
    ax0.set_title('Human')
    ax0.axis('off')
    ax1.imshow(palette)
    ax1.set_title('Palette')
    ax1.axis('off')
    plt.show(fig)



In [None]:
def plot_img_palette(palette_arr):
    fig, (ax0) = plt.subplots(1, 1)
    ax0.imshow(palette_arr)
    ax0.set_title('Image h-palette')
    ax0.axis('off')
    fig.set_figwidth(1)
    fig.set_figheight(len(palette_arr))
    plt.show(fig)



In [None]:
global palette_arr ## array for storing all the palettes of every h-segment in the image
palette_arr=[]

global palettecounts_arr ## array for storing the count of each palette color
palettecounts_arr=[]

global mega_palette_arr
mega_palette_arr=[]

In [None]:
import os
import pandas as pd

# Initialize humandf as an empty DataFrame
humandf = pd.DataFrame(columns=['File Path'])

def listdata(folder_path):
    """
    Create a DataFrame with file paths of all PNG files in the specified folder.

    Args:
    folder_path (str): Path to the folder containing PNG files.

    Returns:
    pandas.DataFrame: DataFrame with 'File Path' column containing PNG file paths.
    """
    # Get a list of all PNG files in the folder
    png_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.lower().endswith('.png')]

    # Create a DataFrame with 'File Path' column containing PNG file paths
    temp_df = pd.DataFrame({'File Path': png_files})

    return temp_df

# Specify the folder path containing PNG files
folder_path = '6k_peds'

# Call the listdata function to get the DataFrame
humandf = listdata(folder_path)

# Save the DataFrame as a CSV file
humandf.to_csv('output.csv', index=False)

# Print a message indicating the CSV file has been created
print('CSV file created successfully.')


In [None]:
# Example usage:
# Specify the folder path
folder_path = "6k_peds"

# Call the function to get the DataFrame
humandf = listdata(folder_path)

# Check if humandf is None or empty
if humandf is not None and not humandf.empty:
    # Access the third row of the DataFrame
    if len(humandf) >= 3:
        row_3 = humandf.iloc[2]  # Note that Python uses 0-based indexing, so row 3 is index 2
        print(row_3)
    else:
        print("DataFrame does not have at least 3 rows.")
else:
    print("DataFrame is None or empty.")

In [None]:
def process_segments():
    global mega_palette_arr
    global palette_arr
    global pallettecounts_arr
    mega_palette_arr=[]
    palette_arrr=[]
    palettecounts_arr=[]
    listdata(folder_path)
    nsegments = len(humandf)
    ns = 0
    mega_palette_arr = []

    #for segmentindex in range(nsegments): ###Running this will cause the notebook to get stuck
    for segmentindex in range(40): #for debugging
        imagepath = humandf.iloc[segmentindex]['File Path']
        print("Image Path:", imagepath)  # Print the image path for debugging

        loadimage(imagepath)
        ns += 1  # adding to the number of segments
       # Get palette_arr and palettecounts_arr from apply_kmeans function
        labarr_r, palettecounts = apply_kmeans(pixels)

        # Construct paletteinfo with updated palette_arr and palettecounts_arr
        paletteinfo = [imagepath, kmcluster_centers_, palettecounts]
        print("Palette Arr:", palette_arr)
        mega_palette_arr.append(paletteinfo)

    return mega_palette_arr
# Call the function
result = process_segments()

# Now, 'result' contains the value returned by the function process_segments()


print(mega_palette_arr)

In [None]:
mega_palette_df = pd.DataFrame(mega_palette_arr)
mega_palette_df.to_csv ('testresults.csv', index = False, header=True)
