In [21]:
import numpy as np
import cv2
from imutils import face_utils
#import argparse
import imutils
import dlib
import os
#import time
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
#from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
#from sklearn.metrics import confusion_matrix
#from sklearn.externals import joblib
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
import glob
#from sklearn.neighbors import KNeighborsClassifier
#from sklearn import svm
#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
import random
REF_POINTS = [1, 4, 14, 17, 18, 20, 22, 23, 25, 27, 28, 31, 32, 36, 37, 38, 40, 42, 43, 45, 46, 47, 49, 51, 52, 53, 61, 63, 65, 67]
EMOTIONS = ["neutral",  "happy", "sadness", "surprise",  "fear", "disgust", "anger"]
wanted_landmarks = [i-1 for i in REF_POINTS]

from sklearn.neural_network import MLPClassifier

In [2]:
#######################################################################################
##############                   Math and transformations                  ############
#######################################################################################
def squared_distance(x,y):
    return (x[0]-y[0])**2+(x[1]-y[1])**2

def rect_to_bb(rect):
    """ take a bounding predicted by dlib and convert it
     to the format (x, y, w, h) as we would normally do
     with OpenCV
     *from web tutorial*"""
    x = rect.left()
    y = rect.top()
    w = rect.right() - x
    h = rect.bottom() - y
    return (x, y, w, h) # return a tuple of (x, y, w, h)

def shape_to_np(shape, dtype="int"):
    """
    *from web tutorial*
    """
    # initialize the list of (x, y)-coordinates
    coords = np.zeros((68, 2), dtype=dtype)
    # loop over the 68 facial landmarks and convert them
    # to a 2-tuple of (x, y)-coordinates
    for i in range(0, 68):
        coords[i] = (shape.part(i).x, shape.part(i).y)
    # return the list of (x, y)-coordinates
    return coords

def nparray_to_pandas_images(faces_68_landmarks):
    """
    input - nparray of numpy array (list of images numpy array, which contains 68 cords(tuple))
    output - pandas dataframe of data
    """
    df = pd.DataFrame.from_records(faces_68_landmarks)
    return df
    
def dataset_from_affectnet(trainingCsvPath):
    """
    problem with affectnet landmarks...
    """
    data_df = pd.read_csv(trainingCsvPath)
    df_filtered = data_df.query('expression<8')
    landmarks = (df_filtered[['facial_landmarks']].values).flatten()
    labels = df_filtered[['expression']].values
    facial_landmarks_data = [np.reshape(i.split(";"),(68,2)) for i in landmarks]
    return (facial_landmarks_data, labels.flatten())

    
#######################################################################################
##############                   Point Methods                             ############
#######################################################################################

def dot_matrix(point_arr):
    """
    input - nparray of (x, y) points
    output - an nxn matrix M where M[i, j] is the dot product of (xi, yi) and (xj, yj)
    """
    dot_m = np.ndarray(shape=(len(point_arr), len(point_arr)), dtype=int)
    for i in range(len(point_arr)):
        for j in range(i+1):
            dot_m[i, j] = np.dot(point_arr[i], point_arr[j])
            dot_m[j, i] = dot_m[i, j]
    return dot_m

def dist_matrix(dot_m):
    """
    input - a dot matrix (output of dot_matrix method)
    output - an nxn matrix M where M[i, j] is the distance between (xi, yi) and (xj, yj)
    """
    dist_m = np.ndarray(shape=dot_m.shape, dtype=float)
    for i in range(dist_m.shape[0]):
        dist_m[i, i] = 0
        for j in range(i):
            dist_m[i, j] = np.sqrt(dot_m[i, i] - 2*dot_m[i, j] + dot_m[j, j])
            dist_m[j, i] = dist_m[i, j]
    return dist_m

def dist_array(dist_m):
    """
    input - a distance matrix (output of dist_matrix method)
    output - an array of all of the distances, w/o duplicates
    """
    dists = []
    for i in range(dist_m.shape[0]):
        for j in range(i):
            dists.append(dist_m[i, j])
    return np.array(dists)

def angle_array(dot_m, dist_m):
    """
    input - a dot matrix (output of dot_matrix method), a distance matrix (output of dist_matrix method)
    output - an array of all of the angles, w/o duplicates
    """
    angles = []
    for i in range(dot_m.shape[0]):
        for j in range(i):
            for k in range(j):
                #TODO change solution to devision by 0
                if not (dist_m[i, j] * dist_m[j, k] * dist_m[i, k]):
                    angles.append(-1)
                    angles.append(-1)
                    # angles.append(-1)
                else:
                    angles.append(np.arccos(round(
                        (dot_m[i, k] - dot_m[i, j] - dot_m[j, k] + dot_m[j, j]) / (dist_m[i, j] * dist_m[j, k]),
                        15)))
                    angles.append(np.arccos(round(
                        (dot_m[i, j] - dot_m[i, k] - dot_m[k, j] + dot_m[k, k]) / (dist_m[i, k] * dist_m[k, j]),
                        15)))
                    # angles.append(np.pi - angles[-1] - angles[-2])
    return np.array(angles)


#######################################################################################
##############            Detecting face and face landmarks                ############
#######################################################################################
    
def image_to_landmarks(image_path, detector, predictor):
    """assuming an image"""
    # load the input image, resize it, and convert it to grayscale
    image = cv2.imread(image_path)
    if image is None:
        return []
    image = imutils.resize(image, width=350)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # detect faces in the grayscale image
    rects = detector(gray, 1)
    # determine the facial landmarks for the face region, then
    # convert the facial landmark (x, y)-coordinates to a NumPy array
    if len(rects)==0:
        return []
    shape = predictor(gray, rects[0])
    shape = face_utils.shape_to_np(shape)
    #faces_68_landmarks.append(shape)
    return shape

def sort_sample_affectnet(inputFolder, csvPathAffectnet, start=0, count=10000):
    """
    csv: 'image_name', 'expression', '68_landmarks'
    """
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    data_df = pd.read_csv(csvPathAffectnet)
    landmarks = []
    # Gal's lines, do not touch!
    # folders = glob.glob(inputFolder + "\\*") #Returns a list of all folders with participant numbers
    # for folder in folders:
    #     files = glob.glob(folder + "\\*")
    #     for f in files:
    #         shape = image_to_landmarks(f, detector, predictor)
    #         shape = list(np.array(shape).flatten())
    #         img_name = [(f.split("\\"))[-1]]
    #         landmarks.append(img_name + shape)
    for i in range(start, start+count):
        f = "{0}\\{1}\\{2}".format(inputFolder, data_df.loc[i, "subDirectory"], data_df.loc[i, "filePath"])
        shape = image_to_landmarks(f, detector, predictor)
        shape = list(np.array(shape).flatten())
        img_name = [(f.split("\\"))[-1]]
        landmarks.append(img_name + shape)
    cols = ["filePath"] + ["x_{0:d}".format(i//2) if i%2==0 else "y_{0:d}".format(i//2) for i in range(2, 69*2)]
    landmarks_df = pd.DataFrame(landmarks, columns=cols, index=np.arange(start, start+count))
    if start == 0:
        data_df = data_df.merge(landmarks_df, on="filePath", how="left")
        data_df.to_csv('affectnet_landmarks.csv', index=False)
    else:
        data_df.update(landmarks_df)
        data_df.to_csv(csvPathAffectnet, index=False)

def add_expression_dummies(features_df):
    for i in range(len(EMOTIONS)):
        features_df["is_{0:s}".format(EMOTIONS[i])] = (features_df["expression"] == i)
    features_df.drop("expression", axis=1, inplace=True)

def csv_to_features(csvDirPath, maxRows=2000, filePrefix="affectnet_landmarks"):
    """
    in - csv from sort_sample_affectnet
    out - features dataframe
    """
    col_names = []
    for i in REF_POINTS:
        col_names.append("x_{0:d}".format(i))
        col_names.append("y_{0:d}".format(i))
    filenames = [entry.name for entry in os.scandir(csvDirPath) if entry.name.endswith(".csv") and entry.name.startswith(filePrefix)]
    for f in filenames:
        print("Processing {0}".format(f))
        data_df = pd.read_csv(os.path.join(csvDirPath, f))
        df_filtered = data_df.query('expression<=7').dropna().iloc[:maxRows, :] #filter out non-faces
        #ndarray of wanted landmarks (row per image)
        images_df = df_filtered[col_names]
        images_df = np.reshape(images_df.values.astype(int), (len(images_df), len(REF_POINTS), 2))
        #extract features
        features_df = extract_features_forall(images_df)
        features_df["expression"] = df_filtered["expression"].values
        add_expression_dummies(features_df)
        features_df.to_csv(os.path.join(csvDirPath, "features_{0}".format(f)))


#######################################################################################
##############            Extract features and reducing dimensions         ############
#######################################################################################

def reduce_correlated_cols(df, threshold=0.95):
    """
    input - df &threshold (if 1>|correlation|>threshold then dimension is reduced
    output - reduced df
    """
    corr = df.corr()
    corr = corr * np.fromfunction(lambda i, j: i > j, corr.shape)
    corr_cols = (corr > threshold).sum(axis=1)
    corr_cols = corr_cols[corr_cols > 0].axes[0].tolist()
    ret = df.drop(corr_cols, axis=1)
    return ret

def dimension_reduction_pca(df, components = 100):
    """
    input - dataframe of features & wanted dimension of features
    output - trained PCA
    uses PCA from skylearn
    """
    #Standardize the Data
    features = list(df.columns.values)
    # Separating out the features
    x = df.loc[:, features].values
    # Standardizing the features
    x = StandardScaler().fit_transform(x)
    #dim reduction
    pca = PCA(components)
    pca.fit_transform(x)
    return pca
    

#######################################################################################
##############            Machine learning algorithms                      ############
#######################################################################################

# logistic regression
def log_reg_classifier(imgs_features, imgs_lbls, c=1):
    """
    input - list of featurs list
    output - logistic regression classifier
    """
    clf = LogisticRegression(C = c, penalty = 'l2') #TODO check best C
    return clf.fit(imgs_features, imgs_lbls)

# SVM
def svm_classifier(imgs_features, imgs_lbls, c=1):
    """
    input - list of featurs list
    output - svm classifier
    """
    # Create a classifier: a support vector classifier
    svm_classifier = svm.SVC(C = c) #TODO check best C
    # training
    return svm_classifier.fit(imgs_features, imgs_lbls)
    
# KNN
def knn_classifier(imgs_features, imgs_lbls, k=1):
    """
    input - list of featurs list
    output - knn classifier
    """
    knn = KNeighborsClassifier(n_neighbors = k) #TODO check best k
    return knn.fit(imgs_features, imgs_lbls) 
    
    


In [3]:
def extract_dlib_facial_points(inputFolder):
    """
    input - images folder name
    output - ndarray of images facial landmarks 
    """
    wanted_landmarks = [i-1 for i in REF_POINTS]
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    files = glob.glob("%s\\*"%inputFolder) #Get list of all images in inputFolder
    faces_landmarks = []
    for f in files:
        if f.lower().endswith(".png") or f.lower().endswith(".jpg") or f.lower().endswith(".jpeg"): 
            shape = np.array(image_to_landmarks(f, detector, predictor))
            if (shape.size!=0):
                shape = shape[wanted_landmarks]
            faces_landmarks.append(shape)
    return np.array(faces_landmarks),files

def extract_features(image, f):
    """
    input - nparray of facial landmarks (point (x,y))
    output - nparray of features per image
    """
    #distance features
    dot_m = dot_matrix(image)
    dist_m = dist_matrix(dot_m)
    dists = dist_array(dist_m)
    norm_factor = np.linalg.norm(image[0]-image[3]) # dist(1,17)
    dists = dists / norm_factor
    #angles features
    angles = angle_array(dot_m, dist_m)
    #flatten and concat
    features_vector = np.around(np.concatenate((dists, angles)),decimals = 2)
    return np.append(f[-8:-4],features_vector)

def extract_features_forall(images,files):
    """
    input - ndarray of images facial landmarks (for each image a 68 long nparry of points)
    output - dataframe of images features
    """
    features = []
    cols = []
    for i in range(len(images)):
        if len(images[i]) != 0:
            features.append(extract_features(images[i],files[i]))
    cols = ["Person"] + ["dist_{1:d}_{0:d}".format(REF_POINTS[i], REF_POINTS[j]) for i in range(len(REF_POINTS)) for j in range(i)]
    for i in range(len(REF_POINTS)):
        for j in range(i):
            for k in range(j):
                cols.append("angle_{2:d}_{1:d}_{0:d}".format(REF_POINTS[i], REF_POINTS[j], REF_POINTS[k]))
                cols.append("angle_{1:d}_{2:d}_{0:d}".format(REF_POINTS[i], REF_POINTS[j], REF_POINTS[k]))
                # cols.append("angle_{2:d}_{0:d}_{1:d}".format(REF_POINTS[i], REF_POINTS[j], REF_POINTS[k]))
    df = pd.DataFrame(features, columns=cols)
    return df

def prepare_balanced_data(csvPaths, portionCount, testPart = 0.1, m_random_state = 33):
    assert testPart<=1
    test_threshhold = int(portionCount*(testPart))
    data_df_tmp = pd.read_csv(csvPaths[0])
    data_df_tmp['emotion'] = [0]*data_df_tmp.shape[0]
    data_df_tmp = shuffle(data_df_tmp, random_state=m_random_state)
    data_df_test = data_df_tmp[:test_threshhold]
    data_df_train = data_df_tmp[test_threshhold:]
    for i in range(len(csvPaths)-1):
        data_df_tmp = pd.read_csv(csvPaths[i+1])
        data_df_tmp['emotion'] = [i+1]*data_df_tmp.shape[0]
        data_df_tmp = data_df_tmp[:portionCount]
        data_df_tmp = shuffle(data_df_tmp, random_state=m_random_state)
        data_df_test = data_df_test.append(data_df_tmp[:test_threshhold])
        data_df_train = data_df_train.append(data_df_tmp[test_threshhold:])
    data_df_test = shuffle(data_df_test, random_state=m_random_state)
    data_df_train = shuffle(data_df_train, random_state=m_random_state)
    #delete first img id col
    data_df_train = data_df_train.drop(data_df_train.columns[0], axis=1)
    data_df_test = data_df_test.drop(data_df_test.columns[0], axis=1)
    return data_df_train,data_df_test   

In [4]:
#1.1. prepare data - prepare neutral DF
NEUTRAL_FOLDER = r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\datasetAll\neutral"
#NEUTRAL_FOLDER = r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\me\neutral"
neutral_faces_landmarks, neut_files = extract_dlib_facial_points(NEUTRAL_FOLDER)

In [5]:
netural_features = extract_features_forall(neutral_faces_landmarks, neut_files)

In [6]:
n_cols = ["n%s"%c for c in netural_features.columns]
netural_features.columns = n_cols
netural_features.to_csv(r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\me\neutral\neutral_features.csv")

In [8]:
for e in EMOTIONS[5:]:
    print("Processing %s..."%e)
    folder = r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\datasetAll\%s"%e
    faces_landmarks, files = extract_dlib_facial_points(folder)
    features = extract_features_forall(faces_landmarks, files)
    features.to_csv("./%s_features.csv"%e)

Processing disgust...
Processing anger...


In [14]:
for e in EMOTIONS:
    print(pd.read_csv("./%s_features.csv"%(e)).shape)

(247, 8557)
(184, 8557)
(128, 8557)
(201, 8557)
(115, 8557)
(164, 8557)
(147, 8557)


In [10]:
print("compute deltas...")
neutral_df = pd.read_csv("./neutral_features.csv")
for e in EMOTIONS[1:]:
    print("Processing %s..."%e)
    df = pd.read_csv("./%s_features.csv"%e)
    df = df.drop(df.columns[0], axis=1)
    cols = df.columns
    #print("Debug - 1")
    df = pd.merge(df, neutral_df, how = 'left', left_on = 'Person', right_on = 'nPerson')
    for c in cols[1:]:
        #print(c)
        df[c] = df[c]-df["n%s"%c]
    df = df.drop(neutral_df.columns, axis=1)
    df.to_csv("./delta_%s_features.csv"%e)

compute deltas...
Processing happy...
Processing sadness...
Processing surprise...
Processing fear...
Processing disgust...
Processing anger...


In [18]:
train_df = []
test_df = []
csvPaths = [".//delta_%s_features.csv"%e for e in EMOTIONS[1:] ]
train_df,test_df = prepare_balanced_data(csvPaths, 130, testPart=0.1)

In [19]:
#process to workable dfs
train_df = train_df.dropna()
test_df = test_df.dropna()
X_train = train_df.iloc[:, 1:-1].as_matrix()    #data
Y_train = train_df['emotion'].as_matrix()
X_test = test_df.iloc[:, 1:-1].as_matrix()      #data
Y_test = test_df['emotion'].as_matrix()            #labels

<h1> Prediction Tries</h1>

<p><b>Logistic Regression</b></p>

In [30]:
Cs = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
for c in Cs:
    m_log_reg = log_reg_classifier(X_train,Y_train,c)
    s = m_log_reg.score(X_test,Y_test)
    print("LogisticRegression score with c={0:f} is: {1:f}".format(c,s))

LogisticRegression score with c=0.000100 is: 0.589744
LogisticRegression score with c=0.001000 is: 0.679487
LogisticRegression score with c=0.010000 is: 0.846154
LogisticRegression score with c=0.100000 is: 0.858974
LogisticRegression score with c=1.000000 is: 0.846154
LogisticRegression score with c=10.000000 is: 0.807692
LogisticRegression score with c=100.000000 is: 0.794872


In [31]:
m_log_reg = log_reg_classifier(X_train,Y_train,0.1)
s = m_log_reg.score(X_test,Y_test)

In [37]:
for i in range(len(X_test)):
    p = m_log_reg.predict([X_test[i]])
    if(p != Y_test[i]):
        print("Error - %s (predicted) - %s (true)"%(EMOTIONS[p[0]+1],EMOTIONS[Y_test[i]+1]))


Error - happy (predicted) - fear (true)
Error - fear (predicted) - happy (true)
Error - surprise (predicted) - fear (true)
Error - fear (predicted) - disgust (true)
Error - anger (predicted) - sadness (true)
Error - disgust (predicted) - anger (true)
Error - disgust (predicted) - surprise (true)
Error - disgust (predicted) - anger (true)
Error - sadness (predicted) - anger (true)
Error - fear (predicted) - sadness (true)
Error - anger (predicted) - sadness (true)


<p><b>MLPClassifier</b></p>

In [26]:
alphas = np.logspace(-1, 1, 5)
names = []
classifiers = []
for i in alphas:
    names.append('alpha ' + str(i))
    classifiers.append(MLPClassifier(alpha=i, random_state=11))
# iterate over classifiers
print(alphas)
print("Scores:")
for name, clf in zip(names, classifiers):
    clf.fit(X_train, Y_train)
    score = clf.score(X_test, Y_test)
    print("{0:s} - {1:f}".format(name, score))

[ 0.1         0.31622777  1.          3.16227766 10.        ]
Scores:
alpha 0.1 - 0.871795
alpha 0.31622776601683794 - 0.858974
alpha 1.0 - 0.871795
alpha 3.1622776601683795 - 0.858974
alpha 10.0 - 0.858974


In [27]:
alpha002 = 1
mlpclassifier = MLPClassifier(alpha=alpha002, random_state=11)
mlpclassifier.fit(X_train, Y_train)
score = mlpclassifier.score(X_test, Y_test)
print("mlpclassifier score is ",score)

mlpclassifier score is  0.8717948717948718


In [38]:
for i in range(len(X_test)):
    p = mlpclassifier.predict([X_test[i]])
    if(p != Y_test[i]):
        print("Error - %s (predicted) - %s (true)"%(EMOTIONS[p[0]+1],EMOTIONS[Y_test[i]+1]))

Error - disgust (predicted) - happy (true)
Error - happy (predicted) - fear (true)
Error - fear (predicted) - happy (true)
Error - fear (predicted) - disgust (true)
Error - disgust (predicted) - anger (true)
Error - disgust (predicted) - surprise (true)
Error - disgust (predicted) - anger (true)
Error - sadness (predicted) - anger (true)
Error - fear (predicted) - sadness (true)
Error - anger (predicted) - sadness (true)


<h1> Sanity Check </h1>

In [100]:
faces_landmarks, files = extract_dlib_facial_points(r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\me\more")
features = extract_features_forall(faces_landmarks, files)
features['Person'] = ['me']*5
features.to_csv(r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\me\more\features_me.csv")

In [77]:
print("compute deltas...")
neutral_df = pd.read_csv(r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\me\neutral\neutral_features.csv")
df = pd.read_csv(r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\me\more\features_me.csv")
neutral_df = neutral_df.drop(neutral_df.columns[0], axis=1)
df = df.drop(df.columns[0], axis=1)
cols = df.columns

compute deltas...


In [78]:
df = pd.merge(df, neutral_df, how = 'left', left_on = 'Person', right_on = 'nPerson')
for c in cols[1:]:
     df[c] = df[c]-df["n%s"%c]
df = df.drop(neutral_df.columns, axis=1)
df.to_csv(r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\me\delta_features.csv")

In [84]:
sanity_df = pd.read_csv(r"C:\Users\DELL1\Documents\studies\FinalProject\Datatsets\me\delta_features.csv")
sanity_df = sanity_df.drop(sanity_df.columns[0], axis=1)
X_sanity = sanity_df.iloc[:, 1:].as_matrix()      #data

In [86]:
ps = m_log_reg.predict(X_sanity)

In [118]:
a = [ 1,2,3,4,5]
print(ps-a)

[-1  0 -2  1  0]


In [116]:
import pickle
filename = 'modelLF.dat'
pickle.dump(m_log_reg, open(filename, 'wb+'))

In [7]:
from PyQt5 import QtCore, QtGui, QtWidgets, QtMultimedia
from PyQt5.QtMultimedia import *
from PyQt5.QtCore import *

In [19]:
PLAYLISTS_PATH = r"C:\Users\DELL1\Documents\GitHub\ExpressionRecognition\Application\Playlists"
class MoodPlayLists(QtMultimedia.QMediaPlayer):
    def __init__(self, parent=None):
        super().__init__(parent)
        moods = glob.glob(PLAYLISTS_PATH +"//*")
        self.playlists = []
        for m in moods:
            songs = glob.glob(m + "//*")
            for s in songs:
                playlist = QMediaPlaylist()
                url = QUrl.fromLocalFile(s)
                playlist.addMedia(QMediaContent(url))
            playlist.setPlaybackMode(QMediaPlaylist.Loop)
            self.playlists.append(playlist)
    
    def change_playlist(self, mood=0):
        self.setPlaylist(self.playlists[mood])


In [20]:
m = MoodPlayLists()

In [37]:
playlist = QMediaPlaylist()
playlist.setPlaybackMode(QMediaPlaylist.Loop)
type(QMediaPlaylist.)


TypeError: 'sip.wrappertype' object is not subscriptable