In [1]:
import cv2
import numpy as np
import scipy
from scipy import spatial
import imageio
import pickle
import random
import os
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from collections import Counter
import pandas as pd
from skimage import data
from skimage.util import img_as_ubyte
from skimage.filters.rank import entropy
from skimage.morphology import disk

In [3]:
# Calculates brightness by splitting HSV color space into 
# hue, saturation, and value. The value is synonymous with brightness.
def get_brightness(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    #cv2.imshow('Image', hsv)
    _, _, v = cv2.split(hsv)
    sum = np.sum(v, dtype=np.float32)
    num_of_pixels = v.shape[0] * v.shape[1]
    return (sum * 100.0) / (num_of_pixels * 255.0)

# Calculates saturation by splitting HSV color space into 
# hue, saturation, and value. Saturation is extracted and represents
# saturation
def get_saturation(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    #cv2.imshow('Image', hsv)
    _, s, _ = cv2.split(hsv)
    sum = np.sum(s, dtype = np.float32)
    num_of_pixels = s.shape[0] * s.shape[1]
    return (sum * 100.0) / (num_of_pixels * 255.0)

# Not working, ask David why
# Calculates entropy
def get_entropy(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    entropy_img = entropy(gray,disk(5))
    all_sum = np.sum(entropy_img, dtype = np.float32)
    num_of_pixels = entropy_img.shape[0] * entropy_img.shape[1]
    return all_sum / num_of_pixels

# Calculates image sharpness by the variance of the Laplacian
def get_sharpness(image):
    img2gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return cv2.Laplacian(img2gray, cv2.CV_64F).var()

# Return contrast (RMS contrast)
def get_contrast(image):
    img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return img_gray.std()

def get_colorfulness(image):
	# split the image into its respective RGB components
	(B, G, R) = cv2.split(image.astype("float"))
	# compute rg = R - G
	rg = np.absolute(R - G)
	# compute yb = 0.5 * (R + G) - B
	yb = np.absolute(0.5 * (R + G) - B)
	# compute the mean and standard deviation of both `rg` and `yb`
	(rbMean, rbStd) = (np.mean(rg), np.std(rg))
	(ybMean, ybStd) = (np.mean(yb), np.std(yb))
	# combine the mean and standard deviations
	stdRoot = np.sqrt((rbStd ** 2) + (ybStd ** 2))
	meanRoot = np.sqrt((rbMean ** 2) + (ybMean ** 2))
	# derive the "saturation" metric and return it
	return stdRoot + (0.3 * meanRoot)

In [4]:
def get_dominant_color(image_path, k=4, image_processing_size = None):
    image = cv2.imread(image_path)
    """
    Link to original code https://adamspannbauer.github.io/2018/03/02/app-icon-dominant-colors/
    
    takes an image as input
    returns the dominant color of the image as a list
    
    dominant color is found by running k means on the 
    pixels & returning the centroid of the largest cluster

    processing time is sped up by working with a smaller image; 
    this resizing can be done with the image_processing_size param 
    which takes a tuple of image dims as input

    >>> get_dominant_color(my_image, k=4, image_processing_size = (25, 25))
    [56.2423442, 34.0834233, 70.1234123]
    """
    #resize image if new dims provided
    if image_processing_size is not None:
        image = cv2.resize(image, image_processing_size, 
                            interpolation = cv2.INTER_AREA)
        
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    #reshape the image to be a list of pixels
    hsv_image = hsv_image.reshape((hsv_image.shape[0] * hsv_image.shape[1], 3))

    #cluster and assign labels to the pixels 
    clt = KMeans(n_clusters = k)
    labels = clt.fit_predict(hsv_image)

    #count labels to find most popular
    label_counts = Counter(labels)

    #subset out most popular centroid
    dominant_color = clt.cluster_centers_[label_counts.most_common(1)[0][0]]

    return list(dominant_color)

In [5]:
def get_edges(image, threshold_lower=100, threshold_upper=200):

    # converts color to grayscale
    img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # nocie reduction by a Gussian blur
    img_blur = cv2.GaussianBlur(img_gray, (3,3), 0)

    # mounts Canny edge detection algorithm. Threshold can be tuned to includ more or less edges depending on the image.
    edges = cv2.Canny(image=img_blur, threshold1=threshold_lower, threshold2=threshold_upper)

    # show result in a new window 
    #cv2.imshow('Canny edges', edges)
    #cv2.waitKey(0)

    # show result in jupyter notebook
    #plt.imshow(edges)
    #plt.show

    # save function
    #cv2.imwrite('test.jpg', edges)

    return edge_ratio(edges)

def edge_ratio(edge_array):
    # counts the pixels which has either a value of 255  in edges or 0 in empty
    edges = 0
    empty = 0
    
    for row in edge_array:
        
        for pixel in row:
            
            if pixel == 255:
                edges += 1
            else:
                empty += 1
                
    return edges / (edges + empty)

In [6]:
def batch_extractor(images_path, pickled_db_path="dominant_color.pck"):
    files = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path))]

    result = {}
    for f in files:
        print('Extracting features from image %s' % f)
        name = f.split('/')[-1].lower()
        image = cv2.imread(f)
        name = name.replace('.png', '')
        
        # Dominant color
        result_dom_color = get_dominant_color(f)
        color_change = result_dom_color
        color_change[0], color_change[-1] = color_change[-1], color_change[0]
        for i in range(0, len(color_change) - 1):
            color_change[i] = round(color_change[i])
        
        brightness = get_brightness(image)
        
        saturation = get_saturation(image)
        
        entropy = get_entropy(image)
        
        sharpness = get_sharpness(image)
        
        contrast = get_contrast(image)
        
        colorfulness = get_colorfulness(image)
        
        edge = get_edges(image)
        
        result[name] = [color_change[0], color_change[1], color_change[2], brightness, saturation, entropy, sharpness, 
                        contrast, colorfulness, edge]
    
    # saving all our feature vectors in pickled file
    with open(pickled_db_path, 'wb') as fp:
        pickle.dump(result, fp)
    
    return result