In [1]:
# train & run
import cv2
import numpy as np
import dlib
from imutils import face_utils
import joblib

# train
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import datetime
from collections import Counter


In [2]:
%matplotlib qt

In [3]:
## face detector
dlib_det = dlib.get_frontal_face_detector()

"""# dmsc_det_l = [
#     cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml"),
#     cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_alt2.xml"),
#     cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_alt.xml"),
#     cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_alt_tree.xml"),
# ]

# dmsc_det_n = len(dmsc_det_l)
# dmsc_det_params = {"scaleFactor": 1.1, "minNeighbors": 10, "minSize": (5,5), "flags": cv2.CASCADE_SCALE_IMAGE}
"""

def face_detector(f_gray):
    rects_dlib = dlib_det(f_gray, 1)
    # rects_dmsc_l = [dmsc_det_l[di].detectMultiScale(f_gray, **dmsc_det_params) for di in range(dmsc_det_n)]
    rects = rects_dlib

    if      rects.__len__() == 1:   rect = rects[0]
    elif    rects.__len__() == 0:   rect = None
    else:                           rect = sorted(rects, key=lambda arr: arr.size)[-1] # of biggest w*h
    return rect

    """# face[i] = faceDet[i].detectMultiScale(gray, **detection_params)
    #Go over detected faces, stop at the first detected face, return empty if no face.
    # if len(face) == 1:
    #     facefeatures = face
    # elif len(face_two) == 1:
    #     facefeatures = face_two
    # elif len(face_three) == 1:
    # facefeatures = face_three
    # elif len(face_four) == 1:
    #     facefeatures = face_four
    # else:
    #     facefeatures = ""
    # #Cut and save the face
    # for (x, y, w, h) in facefeatures: # get coordinates and size of the rectangle containing the face
    #     print("Face found in file: %s" %f)
    #     gray = gray[y:y+h, x:x+w] # cut the frame to size

    # try:
    #     out = cv2.resize(gray, (350,350)) # Resize face so all images have same size
    #     cv2.imwrite("dataset\\%s\\%s.jpg" %(emotion, filenumber), out) # write image
    # except:
    #     pass # if error pass file
    # filenumber+= 1 # increment image number"""

"""# test: dlib seems to be best and works always when dmsc works
# for i in range(datacfr.__len__()):
#   f_gray = datacfr[i][2]

#   rects_dlib = dlib_det(f_gray, 1)
#   rects_dmsc_l = [dmsc_det_l[di].detectMultiScale(f_gray, **dmsc_det_params) for di in range(dmsc_det_n)]

#   print(rects_dlib, rects_dmsc_l)"""
None

In [4]:
## face landmarker

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("./shape_predictor_68_face_landmarks.dat")

def dg_face_draw_landmark(f_gray, marks):
    npshape = marks
    f_draw = cv2.cvtColor(f_gray,cv2.COLOR_GRAY2RGB)
    for (name, (i, j)) in face_utils.FACIAL_LANDMARKS_IDXS.items():
        (x, y, w, h) = cv2.boundingRect(np.array([npshape[i:j]]))
    f_mrkd = face_utils.visualize_facial_landmarks(f_draw, npshape)
    return f_mrkd

def face_landmarker(f_gray, rect=None):
    rect = rect if rect is not None else dlib.rectangle(0, 0, f_gray.shape[1], f_gray.shape[0]) 
        # left, top, right, bottom
    shape = predictor(f_gray, rect)
    marks = face_utils.shape_to_np(shape)    
    return marks


In [5]:
# load > prep
labels_all = ['anger', 'disgust', 'fear', 'happy', 'neutral', 'sadness', "surprise"]

def load_im(f_gray):
    im = f_gray
    im_wh = np.asarray(im.shape)
    im_wh_d2 = np.asarray(im.shape)/2

    rect = face_detector(f_gray)
    if rect is None:
        return None
    rect_bb = np.asarray([
        [max(0,         rect.top()      ),  max(0,          rect.left() )], 
        [min(im_wh[0],  rect.bottom()   ),  min(im_wh[1],   rect.right())]
    ])
    rect_wh         = np.asarray([rect_bb[1,0]-rect_bb[0,0], rect_bb[1,1]-rect_bb[0,1]])
    rect_wh_d2      = rect_wh//2
    rect_cc         = rect_bb[0] + rect_wh_d2
    rect_bb_normd   = (rect_bb - im_wh_d2) / im_wh_d2
    rect_cc_normd   = (rect_cc - im_wh_d2) / im_wh_d2

    f_gray_crop     = f_gray[rect_bb[0,0]:rect_bb[1,0], rect_bb[0,1]:rect_bb[1,1]]

    marks           = face_landmarker(f_gray_crop)
    f_marked        = dg_face_draw_landmark(f_gray_crop, marks)
    """
    try:
        f_marked = dg_face_draw_landmark(f_gray_crop, marks)
    except Exception as ee:
        print(f_gray_crop.shape, rect)
        raise ee
        """
    marks_normd     = (marks - rect_wh_d2) / rect_wh_d2 

    return (
        (f_gray,        im_wh,          im_wh_d2        ),
        (f_gray_crop,   rect_wh,        rect_wh_d2      ),
        (marks,         rect_bb,        rect_cc,        ),
        (marks_normd,   rect_bb_normd,  rect_cc_normd,  ),  
        f_marked, rect,     
    )

## load > fer
def load_fer(sample):
    label = l_lookup[sample["emotion"]]

    # sample is gray
    pixels = np.uint8(sample["pixels"].split(" "))
    size = int(np.sqrt(len(pixels)))
    f_gray = pixels.reshape(size, size)
    im_params = load_im(f_gray)

    return label, im_params
    
     
dataset = pd.read_csv("./fer2013.csv")
# dataset = pd.read_csv("./fer2013.csv")[:100] #dg
l_lookup = ['anger', 'disgust', 'fear', 'happy', 'sadness', 'surprise', "neutral"] 

## SKIP
# datacfr = [
#         (label, imp)
#     for si in range(dataset.__len__()) 
#     for sample in (dataset.iloc[si],)
#     for label, imp in (load_fer(sample),)
#     if imp is not None
# ]


In [6]:
# with open('datacfr.pickle', 'wb') as f:
#     pickle.dump(datacfr, f)

with open('datacfr.pickle', 'rb') as f:
     datacfr = pickle.load(f)

datacfr

[('anger',
  ((array([[ 70,  80,  82, ...,  52,  43,  41],
           [ 65,  61,  58, ...,  56,  52,  44],
           [ 50,  43,  54, ...,  49,  56,  47],
           ...,
           [ 91,  65,  42, ...,  72,  56,  43],
           [ 77,  82,  79, ..., 105,  70,  46],
           [ 77,  72,  84, ..., 106, 109,  82]], dtype=uint8),
    array([48, 48]),
    array([24., 24.])),
   (array([[104, 129, 143, ...,  78,  87,  69],
           [119, 134, 145, ...,  62,  77,  61],
           [123, 136, 144, ...,  48,  74,  56],
           ...,
           [ 79,  95, 107, ..., 146,  87,  91],
           [ 77,  84,  96, ...,  72,  67,  57],
           [ 73,  77,  88, ...,  66,  80,  67]], dtype=uint8),
    array([36, 36]),
    array([18, 18])),
   (array([[-4, 11],
           [-4, 16],
           [-4, 22],
           [-3, 27],
           [-1, 32],
           [ 2, 36],
           [ 7, 38],
           [12, 38],
           [16, 39],
           [20, 38],
           [23, 37],
           [25, 34],
           

In [7]:
# checks
# sample = dataset.iloc[0]
print(datacfr.__len__())

no = 0
if no:
    l = list([np.random.randint(datacfr.__len__()) for _ in range(no)])
    fig, ax = plt.subplots(l.__len__(),3, figsize=(10, 5*l.__len__()))
else: l = []

for sx, si in enumerate(l):
    try:
        datacfr[si]
        label, (
            (f_gray,        im_wh,          im_wh_d2        ),
            (f_gray_crop,   rect_wh,        rect_wh_d2      ),
            (marks,         rect_bb,        rect_cc,        ),
            (marks_normd,   rect_bb_normd,  rect_cc_normd,  ),  
            f_marked, rect,     
        ) = datacfr[si]

        ax[sx, 0].imshow(f_gray)
        ax[sx, 1].imshow(f_gray_crop)
        ax[sx, 2].imshow(f_marked)
        ax[sx, 0].set_title(label)

        print(rect_bb_normd.flatten(), rect_cc, rect_cc_normd.flatten(), marks_normd.shape)
    except:
        pass


24913


In [8]:
# stats
Counter([label for label, (
        (f_gray,        im_wh,          im_wh_d2        ),
        (f_gray_crop,   rect_wh,        rect_wh_d2      ),
        (marks,         rect_bb,        rect_cc,        ),
        (marks_normd,   rect_bb_normd,  rect_cc_normd,  ),  
        f_marked, rect,     
    ) in datacfr])

# for ix, (label, imp) in enumerate(datacfr):
#     print(ix, label)

Counter({'anger': 3404,
         'fear': 3176,
         'neutral': 4645,
         'happy': 6983,
         'surprise': 2959,
         'sadness': 3320,
         'disgust': 426})

## SVC

In [9]:
import pandas as pd #Pandas is a very useful tool for data analysis

import matplotlib.pyplot as plt #Matplot is a classic Python library for plotting charts
import matplotlib.image as mpimg
# %matplotlib inline
import os #Miscellaneous operating system interfaces
import random #Generate pseudo-random numbers
from collections import Counter #High-performance container datatypes

#scikit-learn: Machine Learning in Python, Simple and efficient tools for data mining and data analysis
from sklearn.svm import SVC
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
#from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_predict
from sklearn import metrics
from sklearn.model_selection import cross_val_score

In [10]:
## models
C=1
models = (
    SVC(kernel='linear',                    C=C     ,probability=True),
    SVC(kernel='rbf',       gamma=0.5,      C=C     ,probability=True),
    SVC(kernel='rbf',       gamma=0.3,      C=C     ,probability=True),
    SVC(kernel='rbf',       gamma=0.7,      C=C     ,probability=True),
    SVC(kernel='rbf',       gamma=0.5,      C=5     ,probability=True),
    SVC(kernel='rbf',       gamma=0.5,      C=.2    ,probability=True),
    SVC(kernel='rbf',       gamma='scale',  C=C     ,probability=True),
    SVC(kernel='rbf',       gamma='auto',   C=C     ,probability=True),
    SVC(kernel='poly',      degree=3,       C=C     ,probability=True),
    SVC(kernel='poly',      degree=5,       C=C     ,probability=True),
    # KNeighborsClassifier(3                          ,probability=True),
    # # KNeighborsClassifier(7)
    # DecisionTreeClassifier(max_depth=8              ,probability=True),
    # RandomForestClassifier(max_depth=8,
        # n_estimators=10, max_features=1             ,probability=True),          
    # AdaBoostClassifier(                              probability=True),
    # GaussianNB(                                      probability=True),
)

names = (
    "svc_linear",
    "svc_rbf5_10",
    "svc_rbf7_10",
    "svc_rbf3_10",
    "svc_rbf5_50",
    "svc_rbf5_20",
    "svc_rbf_scale",
    "svc_rbf_auto",
    "svc_poly3",
    "svc_poly7",
    # "k_nearest3",
    # # "k_nearest7",
    # "decision_tree", 
    # "random_forest", 
    # "ada_boost",
    # "naive_bayes",
)

In [11]:
features = np.asarray([marks_normd.flatten() for label, (
        (f_gray,        im_wh,          im_wh_d2        ),
        (f_gray_crop,   rect_wh,        rect_wh_d2      ),
        (marks,         rect_bb,        rect_cc,        ),
        (marks_normd,   rect_bb_normd,  rect_cc_normd,  ),  
        f_marked, rect,     
    ) in datacfr])
labels = np.asarray([label for label, (
        (f_gray,        im_wh,          im_wh_d2        ),
        (f_gray_crop,   rect_wh,        rect_wh_d2      ),
        (marks,         rect_bb,        rect_cc,        ),
        (marks_normd,   rect_bb_normd,  rect_cc_normd,  ),  
        f_marked, rect,     
    ) in datacfr])

print (features.shape, features.dtype ,labels.shape, labels.dtype)


(24913, 136) float64 (24913,) <U8


In [12]:
## train
## SKIP
train_cycle_n = None

for tcix in (train_cycle_n and range(train_cycle_n) or []):
    train_feat,test_feat,train_labels,test_labels = train_test_split(features,labels, test_size=0.2) #,random_state=42 )
    print ("Training set:", train_feat.shape[0], "samples")
    print ("Test set:", test_feat.shape[0], "samples")

    tss = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    print(f"Train cycle #{tcix}, tss={tss}")
    print ("      Classifiers: \t Accuracy:".expandtabs(44))
    for name, clf in zip(names, models):     
        ts = datetime.datetime.now()
        clf.fit(train_feat,train_labels) #train each model
        scores = clf.score(test_feat,test_labels) #evaluate each model in the test set
        print ("{:46} {:.3f} ".format(name, scores.mean()), datetime.datetime.now()-ts)
        joblib.dump(clf, f"./clf/{int(scores.mean()*1e4)}__{name}__{tss}")

## Check

In [13]:
def rand_im():
    ix = np.random.randint(datacfr.__len__())
    filename = f"_datacrt_{ix}"
    label, (
        (f_gray,        im_wh,          im_wh_d2        ),
        (f_gray_crop,   rect_wh,        rect_wh_d2      ),
        (marks,         rect_bb,        rect_cc,        ),
        (marks_normd,   rect_bb_normd,  rect_cc_normd,  ),  
        f_marked, rect,     
    ) = datacfr[ix]
    return label, marks_normd.flatten(), (f_gray, f_gray_crop, f_marked), filename

def read_im(filename):
    try:
        f_gray = cv2.imread(filename, cv2.IMREAD_GRAYSCALE )
        label = "???"
        (
            (f_gray,        im_wh,          im_wh_d2        ),
            (f_gray_crop,   rect_wh,        rect_wh_d2      ),
            (marks,         rect_bb,        rect_cc,        ),
            (marks_normd,   rect_bb_normd,  rect_cc_normd,  ),  
            f_marked, rect,     
        ) = load_im(f_gray)
        return label, marks_normd.flatten(), (f_gray, f_gray_crop, f_marked), filename
    except Exception as ee:
        print(f"{filename}: unable to recog: {str(ee)}")
        return None


In [17]:
clf_fna_l = [
    "./clf2/5727__svc_rbf5_100__20240121054043",
    "./clf2/5727__svc_rbf3_100__20240121062713",
    "./clf2/5725__svc_rbf5_100__20240121062713",
]

im_l = [
    # rand_im(),
    # rand_im(),
    # rand_im(),
    # rand_im(),
    # rand_im(),
    
    # read_im("./vf1.jpg")
    read_im("./foto/anger_1.jpg"), # neutral
    read_im("./foto/neutral1.jpg"), # ok
    read_im("./foto/disgust_2.jpg"), # surprise
    
    # niedobre
    # read_im("./foto/disgust1.jpg"), # anger
    # read_im("./foto/happy1.jpg"),
    # read_im("./foto/")

]

# ---
if clf_fna_l and im_l:
    l_dict = {lix: label for lix, label in enumerate(labels_all)}
    im_l = [im for im in im_l if im is not None]
    im_ln = im_l.__len__()
    clf_n = clf_fna_l.__len__()
    clf_l = [joblib.load(clf_fna) for clf_fna in clf_fna_l]
    im_yt = np.asarray( [im_l[imix][0] for imix in range(im_ln)])
    im_xt = np.asarray( [im_l[imix][1] for imix in range(im_ln)])
    im_im =             [im_l[imix][2] for imix in range(im_ln)]
    im_na =             [im_l[imix][3] for imix in range(im_ln)]

    predict = [clf.predict      (im_xt) for clf in clf_l]
    p_proba = [clf.predict_proba(im_xt) for clf in clf_l]

    imad_n = 2
    fig, ax = plt.subplots(im_ln, imad_n+clf_n)
    fig.tight_layout()
    fig.suptitle(str(l_dict))

    for imx in range(im_l.__len__()):
        ax[imx, 0].imshow(im_im[imx][0])
        ax[imx, 1].imshow(im_im[imx][2])
        ax[imx, 0].set_title(f"FNA: {im_na[imx]}; YT: {im_yt[imx]}")

        for cix in range(clf_n):
            cax = ax[imx, imad_n+cix]
            cax.bar(l_dict.keys(), p_proba[cix][imx]/p_proba[cix][imx].sum())
            cax.bar([l+0.3 for l in l_dict.keys()], p_proba[cix][imx])
            cax.set_title(f"YP: {predict[cix][imx]}")
            cax.set_ylim((0,1))
    

## emo2lab

In [15]:
from matrixWhat import samples
from sklearn.svm import SVR

samples = np.asarray(samples)

## models
C=1
models_r = (
    (SVR(kernel='linear',                    C=C     ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.5,      C=C     ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.3,      C=C     ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.7,      C=C     ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.5,      C=5     ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.5,      C=10    ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.5,      C=20    ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.5,      C=50    ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.5,      C=100   ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.5,      C=200   ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.3,      C=10    ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma=0.5,      C=.2    ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma='scale',  C=C     ) for _ in range(2)), #,probability=True),
    (SVR(kernel='rbf',       gamma='auto',   C=C     ) for _ in range(2)), #,probability=True),
    (SVR(kernel='poly',      degree=3,       C=C     ) for _ in range(2)), #,probability=True),
    (SVR(kernel='poly',      degree=5,       C=C     ) for _ in range(2)), #,probability=True),
)

names_r = (
    "svr_linear",
    "svr_rbf5_10",
    "svr_rbf7_10",
    "svr_rbf3_10",
    "svr_rbf5_50",
    "svr_rbf5_100",
    "svr_rbf5_200",
    "svr_rbf5_500",
    "svr_rbf5_1k",
    "svr_rbf5_2k",
    "svr_rbf3_100",
    "svr_rbf5_20",
    "svr_rbf_scale",
    "svr_rbf_auto",
    "svr_poly3",
    "svr_poly5",
)

emos = samples[:, 0:7]
cola = samples[:, 7]
colb = samples[:, 8]

print (features.shape, features.dtype ,labels.shape, labels.dtype)

train_cycle_n = 1

for tcix in (train_cycle_n and range(train_cycle_n) or []):
    tss = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    print(f"Train cycle #{tcix}, tss={tss}")
    print ("      Classifiers: \t Accuracy:".expandtabs(44))
    for name, (clf_a, clf_b) in zip(names_r, models_r):     
        clf_a.fit(emos,cola) #train each model
        scores = clf_a.score(emos,cola) #evaluate each model in the test set
        print ("{:46} {:.3f} ".format(name, scores.mean()))
        joblib.dump(clf_a, f"./clf_emo2lab/a_{int(scores.mean()*1e4)}__{name}__{tss}")

        clf_b.fit(emos,colb) #train each model
        scores = clf_b.score(emos,colb)
        print ("{:46} {:.3f} ".format(name, scores.mean()))
        joblib.dump(clf_b, f"./clf_emo2lab/b_{int(scores.mean()*1e4)}__{name}__{tss}")


(24913, 136) float64 (24913,) <U8
Train cycle #0, tss=20240121111148
      Classifiers:                           Accuracy:
svr_linear                                     0.437 
svr_linear                                     0.350 
svr_rbf5_10                                    0.564 
svr_rbf5_10                                    0.413 
svr_rbf7_10                                    0.495 
svr_rbf7_10                                    0.366 
svr_rbf3_10                                    0.617 
svr_rbf3_10                                    0.456 
svr_rbf5_50                                    0.712 
svr_rbf5_50                                    0.561 
svr_rbf5_100                                   0.757 
svr_rbf5_100                                   0.631 
svr_rbf5_200                                   0.808 
svr_rbf5_200                                   0.711 
svr_rbf5_500                                   0.850 
svr_rbf5_500                                   0.845 
svr_rbf5_1k 

In [16]:
clf_a_emo2lab = joblib.load('./clf_emo2lab/a_8611__svr_rbf5_2k__20240121111148')
clf_b_emo2lab = joblib.load('./clf_emo2lab/b_9780__svr_rbf5_2k__20240121111148')

ypa = clf_a_emo2lab.predict(emos)
ypb = clf_b_emo2lab.predict(emos)

(np.hstack([emos, cola[:, None], colb[:, None], ypa[:, None], ypb[:, None]]) * 100).astype(int)

array([[   0,    0,   80,   10,  100,    0,    0,   30,   80,   40,   90],
       [ 100,   30,   20,    0,   10,   30,   10,   30,    0,   20,   -9],
       [  10,    0,   90,   10,   30,   30,    0,   25,  100,   35,  109],
       [   0,    0,    0,  100,    0,   20,   20, -100,  100,  -89,   90],
       [   0,    0,   20,    0,   30,   90,   20,    0, -100,  -10,  -89],
       [   0,    0,    0,   10,    0,    0,  100, -100,  100,    6,  102],
       [   0,    0,   20,   10,   30,   90,   10,  -25,  -75,  -15,  -85],
       [   0,    0,    0,    0,    0,    0,  100,   25,   75,   14,   85],
       [  30,   90,   90,    0,   10,   40,    0,   90,  -70,   80,  -59],
       [   0,   10,   70,   10,   80,   10,   30,   25,  100,   14,   89],
       [   0,    0,    0,  100,    0,   70,   30,    0,    0,   -9,    1],
       [   0,    0,    0,   50,    0,    0,   70,   10,   95,    0,   85],
       [ 100,   70,   50,    0,   10,   40,    0,  -80, -100,  -69,  -90],
       [  10,   40,  100,

## Dump

In [25]:
emos.shape, cols.shape, yp.shape

((31, 7), (31, 1), (31,))

In [None]:
# rect utils

# def yx_normalize(im, yx):
#     return 

# def rect_crop(im, rect):
#     return im[
#         max(0, rect.top()   ):min(im.shape[0], rect.bottom()), 
#         max(0, rect.left()  ):min(im.shape[1], rect.right()),
#     ]

# def rect_center(rect):
#     return (rect.top()+(rect.height()//2), rect.left+(rect.width()//2))
# ["anger", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]
# dlib.rectangle(0.5, 0.5, f_gray.shape[1], f_gray.shape[0])
