In [1]:
import cv2
import numpy as np
import pandas as pd
import os

from sklearn.cluster import KMeans

from imutils import face_utils
import dlib

In [2]:
n_colors = 4

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

In [3]:
data_root = './images/dataset_'
seasons = ['spring', 'summer', 'fall', 'winter']

In [4]:
def bgr2cmyk(bgr):
    res_cmyk = []
    bgr = bgr / 255.0
    
    for i in range(len(bgr)):
        b, g, r = bgr[i]
        k = 1 - max(r, g, b)
        c = (1 - r - k) / (1 - k) * 255
        m = (1 - g - k) / (1 - k) * 255
        y = (1 - b - k) / (1 - k) * 255

        tmp = np.array([c, m, y, k])
        res_cmyk.append(tmp)
        
    return np.array(res_cmyk)

In [5]:
lips = None
left_cheek = None
right_cheek = None
right_eye = None
left_eye = None
nose = None

def extract_face_part(img, face_part_points):

    (x, y, w, h) = cv2.boundingRect(face_part_points)
    crop = img[y:y+h, x:x+w]
    
    # https://www.researchgate.net/publication/262371199_Explicit_image_detection_using_YCbCr_space_color_model_as_skin_detection
    # filter skin only (YCbCr)
    crop = cv2.cvtColor(crop, cv2.COLOR_BGR2YCrCb)
    mask = cv2.inRange(crop, np.array([0, 133, 77]), np.array([255, 173, 127]))
    crop = cv2.bitwise_and(crop, crop, mask=mask)
    crop = cv2.cvtColor(crop, cv2.COLOR_YCrCb2BGR)

    crop = crop[~np.all(crop == [0, 135, 0], axis=-1)]
    crop = crop.reshape(((1, crop.shape[0], 3)))
    
    return crop

def detect_face_part(img):
    face_parts = [[] for _ in range(len(face_utils.FACIAL_LANDMARKS_IDXS))]

    faces = detector(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 1)
    
    if len(faces) == 0:
        return 0

    rect = faces[0]

    shape = predictor(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), rect)
    shape = face_utils.shape_to_np(shape)

    for idx, (_, (i, j)) in enumerate(face_utils.FACIAL_LANDMARKS_IDXS.items()):
        if idx not in [1, 3]:
            face_parts[idx] = shape[i:j]

    lips = extract_face_part(img, np.concatenate((shape[48:60], shape[60:68])))
    left_cheek = extract_face_part(img, np.concatenate((shape[29:33], shape[4:9])))
    right_cheek = extract_face_part(img, np.concatenate((shape[29:33], shape[10:15])))
    right_eye = extract_face_part(img, shape[36:42])
    left_eye = extract_face_part(img, shape[42:48])
    nose = extract_face_part(img, shape[27:36])

    return len(faces), lips, left_cheek, right_cheek, right_eye, left_eye, nose

def create_palette(img, image_path='image.jpg'):
        
    img = cv2.imread(image_path)
    
    if img is None:
        os.remove(image_path)
        return None
        
    yes_faces, lips, left_cheek, right_cheek, right_eye, left_eye, nose = detect_face_part(img)
    
    if not yes_faces:
        os.remove(image_path)
        return None
    
    stacked_images = np.hstack([right_eye, left_eye, lips, left_cheek, right_cheek, nose])
    stacked_images = stacked_images.reshape(-1, 3)
    
    if stacked_images.shape[0] == 0:
        os.remove(image_path)
        return None
        
    kmeans = KMeans(n_clusters=n_colors, n_init=10, random_state=42)
    kmeans.fit(stacked_images)

    cluster_centers = kmeans.cluster_centers_.astype(int)
    
    # save_palette(cluster_centers)
    
    return cluster_centers, lips, left_cheek, right_cheek, right_eye, left_eye, nose

In [6]:
def calculate_contrast(img):
    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    hist = cv2.calcHist([gray_image], [0], None, [256], [0, 256])
    hist /= hist.sum()
    mean = np.mean(hist)
    variance = np.mean((hist - mean) ** 2)
    return variance

In [7]:
for season in seasons:
    this_df = pd.DataFrame(columns=['lip_avg_a', 'face_l_var', 'skin_avg_b', 'H', 'S', 'V', 'L', 'a', 'b', 'C', 'M', 'Y', 'K', 'label'])
    for file in os.listdir(os.path.join(data_root, season)):
        full_path = os.path.join(data_root, season, file)
        img = cv2.imread(full_path)
        if img is None:
            continue
        palette, lips, left_cheek, right_cheek, right_eye, left_eye, nose = create_palette(full_path)
        if palette is None:
            continue
        cmyk_palette = bgr2cmyk(palette)
        palette = np.array([palette], np.uint8)
        hsv_palette = cv2.cvtColor(palette, cv2.COLOR_BGR2HSV)
        lab_palette = cv2.cvtColor(palette, cv2.COLOR_BGR2LAB)
    
        mean_hsv = np.mean(hsv_palette, axis=1)[0]
        mean_lab = np.mean(lab_palette, axis=1)[0]
        mean_cmyk = np.mean(cmyk_palette, axis=0)
        
        skin = np.hstack([left_cheek, right_cheek, nose])
        skin = skin.reshape(-1, 3)
        kmeans = KMeans(n_clusters=3, n_init=10, random_state=42)
        kmeans.fit(skin)
        skin_centers = kmeans.cluster_centers_.astype(int)
        skin_centers = np.array([skin_centers], np.uint8)
        lab_palette = cv2.cvtColor(skin_centers, cv2.COLOR_BGR2LAB)
        mean_lab_skin = np.mean(lab_palette, axis=1)[0]
        
        kmeans = KMeans(n_clusters=3, n_init=10, random_state=42)
        lips = lips.reshape(-1, 3)
        kmeans.fit(lips)
        lips_centers = kmeans.cluster_centers_.astype(int)
        lips_centers = np.array([lips_centers], np.uint8)
        lab_palette = cv2.cvtColor(lips_centers, cv2.COLOR_BGR2LAB)
        mean_lab_lips = np.mean(lab_palette, axis=1)[0]
        
        h, w = img.shape[:2]
        try:
            face = detector(img)[0]
        except:
            continue
        face = img[max(0, face.top()):min(face.bottom(), h), max(0, face.left()):min(face.right(), w)]
        face_l_var = calculate_contrast(face)
        
        tmp = np.array([mean_lab_lips[1], face_l_var, mean_lab_skin[2]])
        row = np.concatenate((tmp, mean_hsv, mean_lab, mean_cmyk)).tolist()
        row.append(season)
        this_df.loc[len(this_df)] = row
    print(f'{season}: {len(this_df)} rows')
    this_df.to_csv(f'images/dataset_/mean_{season}.csv', index=False)

spring: 59 rows
summer: 61 rows
fall: 49 rows
winter: 68 rows


In [8]:
df = pd.concat([pd.read_csv(f'images/dataset_/mean_{season}.csv') for season in seasons])
df = df.sample(frac=1).reset_index(drop=True)
df.to_csv('images/dataset_/mean_shuffled.csv', index=False)

In [9]:
from sklearn.preprocessing import LabelEncoder

In [10]:
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

In [11]:
corr = df.corr()
corr['label'].sort_values(ascending=False)

label         1.000000
face_l_var    0.092497
lip_avg_a          NaN
skin_avg_b         NaN
H                  NaN
S                  NaN
V                  NaN
L                  NaN
a                  NaN
b                  NaN
C                  NaN
M                  NaN
Y                  NaN
K                  NaN
Name: label, dtype: float64

In [12]:
df

Unnamed: 0,lip_avg_a,face_l_var,skin_avg_b,H,S,V,L,a,b,C,M,Y,K,label
0,150.666667,0.000022,142.0,6.0,87.75,173.0,146.0,143.75,141.25,0.0,70.415163,87.684261,0.321569,2
1,150.666667,0.000009,142.0,6.0,87.75,173.0,146.0,143.75,141.25,0.0,70.415163,87.684261,0.321569,0
2,150.666667,0.000019,142.0,6.0,87.75,173.0,146.0,143.75,141.25,0.0,70.415163,87.684261,0.321569,1
3,150.666667,0.000018,142.0,6.0,87.75,173.0,146.0,143.75,141.25,0.0,70.415163,87.684261,0.321569,3
4,150.666667,0.000019,142.0,6.0,87.75,173.0,146.0,143.75,141.25,0.0,70.415163,87.684261,0.321569,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232,150.666667,0.000008,142.0,6.0,87.75,173.0,146.0,143.75,141.25,0.0,70.415163,87.684261,0.321569,2
233,150.666667,0.000015,142.0,6.0,87.75,173.0,146.0,143.75,141.25,0.0,70.415163,87.684261,0.321569,2
234,150.666667,0.000009,142.0,6.0,87.75,173.0,146.0,143.75,141.25,0.0,70.415163,87.684261,0.321569,2
235,150.666667,0.000016,142.0,6.0,87.75,173.0,146.0,143.75,141.25,0.0,70.415163,87.684261,0.321569,0
