# f0_image_feature_module 

This is a summary workbook summarizing all feature extraction functions. Details can be found in the respective workbook, all starting with f. 

In [9]:
import numpy as np
import os
import math
from os.path import isfile, join
from os import listdir
from sklearn.cluster import KMeans
from collections import Counter
import cv2
import image_preprocessing as ip


In [10]:
path_to_library = "../00_data/00_test_pic"

file_list = [path_to_library + "/" + f for f in listdir(path_to_library) if isfile(join(path_to_library, f))] 

file_list

['../00_data/00_test_pic/.DS_Store',
 '../00_data/00_test_pic/frog_2.jpg',
 '../00_data/00_test_pic/test.jpg',
 '../00_data/00_test_pic/red.jpg']

In [11]:
img_ready = ip.img_ready(path_to_library = "../00_data/00_test_pic")
img_ready

File ../00_data/00_test_pic/.DS_Store is not readable.


[array([[[ 39,  97,  43],
         [ 39,  97,  43],
         [ 39,  97,  43],
         ...,
         [ 22,  62,  23],
         [ 24,  60,  24],
         [ 25,  60,  25]],
 
        [[ 39,  97,  43],
         [ 39,  97,  43],
         [ 39,  97,  43],
         ...,
         [ 22,  62,  23],
         [ 24,  60,  24],
         [ 25,  60,  25]],
 
        [[ 39,  97,  43],
         [ 39,  97,  43],
         [ 39,  97,  43],
         ...,
         [ 22,  62,  23],
         [ 24,  60,  24],
         [ 26,  60,  25]],
 
        ...,
 
        [[ 15, 112,  37],
         [ 15, 112,  37],
         [ 15, 113,  37],
         ...,
         [ 27, 127,  53],
         [ 29, 129,  55],
         [ 30, 131,  57]],
 
        [[ 15, 111,  37],
         [ 15, 111,  37],
         [ 14, 111,  37],
         ...,
         [ 26, 127,  53],
         [ 28, 129,  56],
         [ 30, 131,  58]],
 
        [[ 15, 110,  36],
         [ 15, 111,  36],
         [ 14, 111,  36],
         ...,
         [ 26, 127,  53],
  

### HSV

In [12]:
def img_hsv(img_ready):
    
    img_hsv = []
    
    for img in img_ready:
        
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h = []
        s = []
        v = []
    
        for line in hsv:
            for pixel in line:
                temp_h, temp_s, temp_v = pixel
                h.append(temp_h)
                s.append(temp_s)
                v.append(temp_v)
            
        average_h = round(sum(h)/len(h),4)
        average_s = round(sum(s)/len(s),4)
        average_v = round(sum(v)/len(v),4)
        
        hsv_temp = [average_h, average_s, average_v]
        img_hsv.append(hsv_temp)
            
    return img_hsv

In [13]:
list_hsv = img_hsv(ip.img_ready("../00_data/00_test_pic"))


File ../00_data/00_test_pic/.DS_Store is not readable.


### Colorfulness

In [14]:
def img_colorfulness(img_ready):
    
    img_colorfulness = []
    
    for img in img_ready: 
        
        (B, G, R) = cv2.split(img.astype("float"))

        rg = np.absolute(R - G)
        yb = np.absolute(0.5*(R + G) - B)

        (rbMean, rbStd) = (np.mean(rg), np.std(rg))
        (ybMean, ybStd) = (np.mean(yb), np.std(yb))
            
        stdRoot = np.sqrt((rbStd ** 2) + (ybStd ** 2))
        meanRoot = np.sqrt((rbMean ** 2) + (ybMean ** 2))
        c_metric = stdRoot + (0.3 * meanRoot) 
    
        temp_result = list([c_metric])
        img_colorfulness.append(temp_result)
    
    return img_colorfulness #result is a list of sub-lists. Each sub-list contains 2 elements: file_path, colorfulness (the higher the number, the more colorful)


In [15]:
list_colorfulness = img_colorfulness(img_ready)

### Contrast

In [16]:
def img_contrast(img_ready):
    
    img_contrast = []
    
    for img in img_ready: 
    
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        contrast = img.std()
            
        temp_result = list([contrast])
        img_contrast.append(temp_result)
    
    return img_contrast #result is a list of sub-lists. Each sub-list contains 2 elements: file_path, contrast (the higher the number, the higher the contrast



In [17]:
list_contrast = img_contrast(img_ready)

### Dominant Color

In [18]:
def img_dominant_color(img_ready, k=4):
    
    img_dominant_color = []
    
    for img in img_ready: 
    
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #convert to RGB to get the right order
        img = img.reshape((img.shape[0] * img.shape[1], 3))
    
        #cluster and assign labels to the pixels 
        clt = KMeans(n_clusters = k)
        labels = clt.fit_predict(img)
        
        #count labels to find most popular
        label_counts = Counter(labels)
        
        #subset out most popular centroid
        dominant_color = clt.cluster_centers_[label_counts.most_common(1)[0][0]]
        temp_result = list(dominant_color)
        img_dominant_color.append(temp_result)
            
    return img_dominant_color #result is a list of sub-lists. Each sub-list contains 4 elements: file_path, r,g,b

In [19]:
list_dominant_color = img_dominant_color(img_ready)

## Write a function to summarize all results into 1 dataframe

Goal - dataframe with following columns

file_name | h | s | v | dominant_color_R | dominant_color_G | dominant_color_B | contrast | colorfulness | ...

In [20]:
import pandas as pd

In [21]:
a = [[1,2,4], [2,2,5], [4,2,9]]
b = [[1], [3], [7]]

In [22]:
a0 = a[0] + b[0]
a0

[1, 2, 4, 1]

In [23]:
a1 = a[1] + b[0]
a1

[2, 2, 5, 1]

In [24]:
z = [[1, 2, 4, 1], [2, 2, 5, 1]]


In [25]:
pd.DataFrame(z)

Unnamed: 0,0,1,2,3
0,1,2,4,1
1,2,2,5,1


In [26]:
# use the logic above to combine all feature results

In [27]:
img_list, valid_path = ip.img_read(file_list)


File ../00_data/00_test_pic/.DS_Store is not readable.


In [29]:
valid_path

[['../00_data/00_test_pic/frog_2.jpg'],
 ['../00_data/00_test_pic/test.jpg'],
 ['../00_data/00_test_pic/red.jpg']]

In [30]:
list_hsv

[[49.7562, 154.8772, 135.9965],
 [53.5365, 12.9889, 178.8828],
 [171.2669, 209.4558, 111.5541]]

In [31]:
list_colorfulness

[[50.704544146258954], [6.923195255127685], [60.78603025652514]]

In [32]:
list_contrast

[[37.9766408829949], [87.36233917303912], [12.412051788676655]]

In [33]:
list_dominant_color

[[69.34662484165005, 133.29987331845336, 43.02002774929542],
 [250.01762699018497, 250.01766489769238, 250.0460576194406],
 [110.41638863613389, 17.27090284096618, 26.261854210898612]]

In [34]:
result_value = []
for i in range(len(valid_path)):
    temp = valid_path[i] + list_hsv[i] + list_colorfulness[i] + list_contrast[i] + list_dominant_color[i]
    result_value.append(temp)

In [35]:
features = ["file_path", "H", "S", "V", "colorfulness", "contrast", "R", "G", "B"]

In [36]:
df_result = []
for i in range(len(result_value)):
    df_result.append(dict(zip(features, result_value[i])))

In [37]:
pd.DataFrame(df_result)

Unnamed: 0,file_path,H,S,V,colorfulness,contrast,R,G,B
0,../00_data/00_test_pic/frog_2.jpg,49.7562,154.8772,135.9965,50.704544,37.976641,69.346625,133.299873,43.020028
1,../00_data/00_test_pic/test.jpg,53.5365,12.9889,178.8828,6.923195,87.362339,250.017627,250.017665,250.046058
2,../00_data/00_test_pic/red.jpg,171.2669,209.4558,111.5541,60.78603,12.412052,110.416389,17.270903,26.261854


In [38]:
def img_get_feature(file_list, img_ready, k=4): 

    img_list, valid_path = ip.img_read(file_list)
    list_hsv = img_hsv(img_ready = img_ready)
    list_colorfulness = img_colorfulness(img_ready = img_ready)
    list_contrast = img_contrast(img_ready = img_ready)
    list_dominant_color = img_dominant_color(img_ready = img_ready, k=k)

    feature_list = []
    features = ["file_path", "H", "S", "V", "colorfulness", "contrast", "dom_R", "dom_G", "dom_B"]
    for i in range(len(valid_path)):
        temp = valid_path[i] + list_hsv[i] + list_colorfulness[i] + list_contrast[i] + list_dominant_color[i]
        feature_list.append(temp)

    return features, result_value

In [39]:
file_list = ip.get_file_path("../00_data/00_test_pic/")
ready_img = ip.img_ready("../00_data/00_test_pic/")
img_get_feature(file_list, ready_img, k=4)

File ../00_data/00_test_pic//.DS_Store is not readable.
File ../00_data/00_test_pic//.DS_Store is not readable.


(['file_path',
  'H',
  'S',
  'V',
  'colorfulness',
  'contrast',
  'dom_R',
  'dom_G',
  'dom_B'],
 [['../00_data/00_test_pic/frog_2.jpg',
   49.7562,
   154.8772,
   135.9965,
   50.704544146258954,
   37.9766408829949,
   69.34662484165005,
   133.29987331845336,
   43.02002774929542],
  ['../00_data/00_test_pic/test.jpg',
   53.5365,
   12.9889,
   178.8828,
   6.923195255127685,
   87.36233917303912,
   250.01762699018497,
   250.01766489769238,
   250.0460576194406],
  ['../00_data/00_test_pic/red.jpg',
   171.2669,
   209.4558,
   111.5541,
   60.78603025652514,
   12.412051788676655,
   110.41638863613389,
   17.27090284096618,
   26.261854210898612]])