In [11]:
import cv2
import glob
import numpy as np
from skimage.feature import local_binary_pattern

In [12]:
def get_brightness(img):
    img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
    (y, u, v) = cv2.split(img_yuv)
    brightness = y.mean()
    return brightness

def get_sharpness(img_gray):
    n = img_gray.shape[0]
    m = img_gray.shape[1]
    N = float(n*m)    
    img_blur = cv2.blur(img_gray,(3,3))
    img_lap = cv2.Laplacian(img_gray,3)
    Sh = 0
    for i in range(len(img_lap)):
        for j in range(len(img_lap[0])):
            if(img_blur[i][j] != 0):
                Sh += np.abs(float(img_lap[i][j])/float(img_blur[i][j]))
    return Sh/N

def get_colorfulness(img):
    (b,g,r) = cv2.split(img)
    rg = r - g
    yb = 0.5*(r + g) - b
    sigma = np.sqrt(rg.var() + yb.var())
    mu = np.sqrt(rg.mean()**2 + yb.mean()**2)
    Cf = sigma + 0.3*mu
    return Cf

def get_hue(img):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    imghsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype("float32")
    (h, s, v) = cv2.split(imghsv)
    h_m = h.mean()
    return h_m

def get_saturation(img):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    imghsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype("float32")
    (h, s, v) = cv2.split(imghsv)
    s = s.mean()
    return s

def get_entropy(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    hist = cv2.calcHist([img],[0],None,[256],[0,256])
    hist = hist.ravel()/float(hist.sum())
    logs = np.log2(hist+0.00001)
    entropy = -1 * (hist*logs).sum()
    return entropy 

def get_contrast(img):
    (b,g,r) = cv2.split(img)
    b_mean = b.mean()
    g_mean = g.mean()
    r_mean = r.mean()
    contrast = ((b - b_mean)**2 + (g - g_mean)**2 + (r - r_mean)**2).sum() / float(img.shape[0]*img.shape[1])
    return contrast

In [13]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [22]:
class ImageDataset(Dataset):
    def __init__(self):
        self.image_files = glob.glob("/mnt/workspace/Ugallery/images/*.jpg")        
        self.artwork_ids = [int(f[f.rfind('/')+1:-4]) for f in self.image_files]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):        
        
        # load image
        file = self.image_files[idx]
        img = cv2.imread(file, cv2.IMREAD_COLOR)
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)        
        
        # attractiveness
        att_vector = np.array([
            get_brightness(img),
            get_sharpness(img_gray),
            get_saturation(img),
            get_colorfulness(img),
            get_entropy(img),
            get_contrast(img),
        ])
        
        # LBP
        tmp = local_binary_pattern(img_gray, 8, 1, method='nri_uniform')
        bins, counts = np.unique(tmp, return_counts=True)
        lbp_vector = np.zeros((59,))
        for b,c in zip(bins,counts):
            lbp_vector[int(b)] = c
        lbp_vector /= (tmp.shape[0] * tmp.shape[1])
        
        # return item
        return {'id': self.artwork_ids[idx],
                'att': att_vector,
                'lbp': lbp_vector}

In [67]:
image_dataset = ImageDataset()

In [68]:
dataloader = DataLoader(image_dataset, batch_size=256, num_workers=8)

In [69]:
N = len(image_dataset)
output_ids = np.empty((N,), dtype=int)
output_att_features = np.empty((N,6), dtype=np.float16)
output_lbp_features = np.empty((N,59), dtype=np.float16)

In [70]:
from time import time

In [71]:
offset = 0
start_time = time()
for i_batch, batch in enumerate(dataloader):    
    assert offset < N
    
    batch_ids = batch['id'].numpy()
    batch_att_features = batch['att'].numpy()
    batch_lbp_features = batch['lbp'].numpy()
    batch_size = len(batch_ids)
    
    output_ids[offset:offset+batch_size] = batch_ids
    output_att_features[offset:offset+batch_size] = batch_att_features
    output_lbp_features[offset:offset+batch_size] = batch_lbp_features
    
    elapsed_time = time() - start_time    
    offset += batch_size    
    print('i_batch = %d, offset = %d, batch_size = %d, elapsed_time = %.2f' % (i_batch, offset, batch_size, elapsed_time))

i_batch = 0, offset = 256, batch_size = 256, elapsed_time = 530.39
i_batch = 1, offset = 512, batch_size = 256, elapsed_time = 533.04
i_batch = 2, offset = 768, batch_size = 256, elapsed_time = 540.82
i_batch = 3, offset = 1024, batch_size = 256, elapsed_time = 542.79
i_batch = 4, offset = 1280, batch_size = 256, elapsed_time = 542.79
i_batch = 5, offset = 1536, batch_size = 256, elapsed_time = 542.80
i_batch = 6, offset = 1792, batch_size = 256, elapsed_time = 542.80
i_batch = 7, offset = 2048, batch_size = 256, elapsed_time = 542.80
i_batch = 8, offset = 2304, batch_size = 256, elapsed_time = 1041.70
i_batch = 9, offset = 2560, batch_size = 256, elapsed_time = 1050.69
i_batch = 10, offset = 2816, batch_size = 256, elapsed_time = 1052.92
i_batch = 11, offset = 3072, batch_size = 256, elapsed_time = 1059.67
i_batch = 12, offset = 3328, batch_size = 256, elapsed_time = 1060.99
i_batch = 13, offset = 3584, batch_size = 256, elapsed_time = 1087.09
i_batch = 14, offset = 3840, batch_size =

In [72]:
import os
dir_path = "/mnt/workspace/Ugallery/Handcrafted/"
os.makedirs(dir_path, exist_ok=True)
output_att_features.dump(dir_path + "attractiveness.npy")
output_lbp_features.dump(dir_path + "lbp.npy")
output_ids.dump(dir_path + "ids.npy")