In [471]:
import pandas as pd
import numpy as np
import os

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [472]:
df_ckplus_dataset = pd.read_csv("dataset/ckplus_dataset.csv")

In [473]:
def read_emotion_features(emotion):
    distance_features = []
    angle_features = []
    with open(f"features/{emotion}.txt", "r") as file:
        for line in file:
            feature = line.strip().split()
            feature = [int(value) for value in feature[0][2:-1].split(",")]
            if len(feature) == 2:
                distance_features.append(feature)
            else:
                angle_features.append(feature)
    return distance_features, angle_features


def load_emotion_features():
    emotions = ["neutral", "anger", "disgust", "fear", "happiness", "sadness", "surprise"]
    emotion_features = {}
    for emotion in emotions:
        distance_features, angle_features = read_emotion_features(emotion)
        emotion_features[emotion] = (distance_features, angle_features)
    return emotion_features


emotion_features = load_emotion_features()

In [474]:
def euclidean_distance(a, b):
    return np.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)


def smaller_angle(a, b, c):
    a = np.array(a)
    b = np.array(b)
    c = np.array(c)
    ba = a - b
    bc = c - b
    cosine = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    return np.arccos(cosine) * 180 / np.pi


def mid_point(a, b):
    return [(a[0] + b[0]) / 2, (a[1] + b[1]) / 2]


def get_face_width(data):
    point1 = data["landmark_2"].split(" ")
    point1 = [float(point1[0]), float(point1[1])]
    point2 = data["landmark_16"].split(" ")
    point2 = [float(point2[0]), float(point2[1])]
    return euclidean_distance(point1, point2)


def generate_distance_features(data, feature_emotion):
    distance_features_template = emotion_features[feature_emotion][0]

    distance_features = []
    for feature in distance_features_template:
        point1 = data[f"landmark_{feature[0]}"].split(" ")
        point1 = [float(point1[0]), float(point1[1])]
        point2 = data[f"landmark_{feature[1]}"].split(" ")
        point2 = [float(point2[0]), float(point2[1])]

        distance = euclidean_distance(point1, point2)
        distance_features.append(distance)

    if feature_emotion == "neutral":
        point1 = data["landmark_37"].split(" ")
        point1 = [float(point1[0]), float(point1[1])]
        point2 = data["landmark_46"].split(" ")
        point2 = [float(point2[0]), float(point2[1])]
        point3 = data["landmark_9"].split(" ")
        point3 = [float(point3[0]), float(point3[1])]

        distance = euclidean_distance(mid_point(point1, point2), point3)
        distance_features.append(distance)

    distance_features = np.array(distance_features)
    distance_features = distance_features / get_face_width(data)

    return distance_features


def generate_angle_features(data, feature_emotion):
    angle_features_template = emotion_features[feature_emotion][1]

    angle_features = []
    for feature in angle_features_template:
        point1 = data[f"landmark_{feature[0]}"].split(" ")
        point1 = [float(point1[0]), float(point1[1])]
        point2 = data[f"landmark_{feature[1]}"].split(" ")
        point2 = [float(point2[0]), float(point2[1])]
        point3 = data[f"landmark_{feature[2]}"].split(" ")
        point3 = [float(point3[0]), float(point3[1])]

        angle = smaller_angle(point1, point2, point3)
        angle_features.append(angle)

    angle_features = np.array(angle_features)
    angle_features = angle_features / 180

    return angle_features

In [475]:
def generate_features(data, feature_emotion, feature_type):
    features = np.array([])

    if feature_type == "distance" or feature_type == "both":
        distance_features = generate_distance_features(data, feature_emotion)
        features = np.concatenate((features, distance_features))

    if feature_type == "angle" or feature_type == "both":
        angle_features = generate_angle_features(data, feature_emotion)
        features = np.concatenate((features, angle_features))

    return features


def generate_features_multiple(data, feature_emotions, feature_type):
    all_features = np.array([])
    
    for feature_emotion in feature_emotions:
        features = generate_features(data, feature_emotion, feature_type)
        all_features = np.concatenate((all_features, features))
        
    return all_features


def get_features_from_df(df, feature_emotions, feature_type):
    features = []
    for index, row in df.iterrows():
        features.append(generate_features_multiple(row, feature_emotions, feature_type))

    features = np.array(features)
    print(features.shape)
    
    return features


def get_classes_from_df(df):
    classes = []
    for index, row in df.iterrows():
        classes.append(row["subject_id"])
    return np.array(classes)

In [476]:
def get_model(model_type):
    if model_type == "lda":
        return LinearDiscriminantAnalysis()
    elif model_type == "svc":
        return SVC()
    elif model_type == "rand_forest":
        return RandomForestClassifier()
    else:
        raise ValueError("Invalid model type")


def train_model(model, X_train, y_train):
    model.fit(X_train, y_train)
    return model


def test_model(model, X_test, y_test):
    return model.score(X_test, y_test)

In [477]:
def get_subject_session_data(subject_id, session_id):
    return df_ckplus_dataset[
        (df_ckplus_dataset["subject_id"] == subject_id)
        & (df_ckplus_dataset["session_id"] == session_id)
    ]


def get_subjects(emotion, no_of_subjects=None):
    df_subjects = pd.read_csv(f"dataset/{emotion}_filtered.csv")
    if no_of_subjects is not None:
        df_subjects = df_subjects.head(no_of_subjects)
    assert no_of_subjects is None or len(df_subjects) == no_of_subjects
    df_subjects = df_subjects[["subject_id", "session_id"]]
    
    return df_subjects
    

In [478]:
def generate_train_test_data(emotion, no_of_subjects=None, seed=42):
    subjects = get_subjects(emotion=emotion, no_of_subjects=no_of_subjects)

    df_neutral_train = pd.DataFrame()
    df_neutral_test = pd.DataFrame()
    df_emotion_train = pd.DataFrame()
    df_emotion_test = pd.DataFrame()

    for index, row in subjects.iterrows():
        subject_id = row["subject_id"]
        session_id = row["session_id"]
        subject_data = get_subject_session_data(subject_id, session_id)

        neutral_data = subject_data.head(len(subject_data) // 2)
        emotion_data = subject_data.tail(len(subject_data) // 2)
        
        neutral_data = neutral_data.sample(n=3, random_state=seed)
        emotion_data = emotion_data.sample(n=3, random_state=seed)

        neutral_train = neutral_data.head(2)
        neutral_test = neutral_data.tail(1)
        
        emotion_train = emotion_data.head(2)
        emotion_test = emotion_data.tail(1)
        
        df_neutral_train = pd.concat([df_neutral_train, neutral_train])
        df_neutral_test = pd.concat([df_neutral_test, neutral_test])
        
        df_emotion_train = pd.concat([df_emotion_train, emotion_train])
        df_emotion_test = pd.concat([df_emotion_test, emotion_test])
        
    return df_neutral_train, df_neutral_test, df_emotion_train, df_emotion_test

In [479]:
def evaluate_performance(df_train, df_test, feature_emotions, feature_type, model_type):
    X_train = get_features_from_df(df_train, feature_emotions, feature_type)
    y_train = get_classes_from_df(df_train)

    X_test = get_features_from_df(df_test, feature_emotions, feature_type)
    y_test = get_classes_from_df(df_test)

    model = get_model(model_type)
    model = train_model(model, X_train, y_train)

    return test_model(model, X_test, y_test)

In [485]:
df_neutral_train, df_neutral_test, df_emotion_train, df_emotion_test = generate_train_test_data("anger", seed=55)

df_combined_train = pd.concat([df_neutral_train, df_emotion_train])
df_combined_test = pd.concat([df_neutral_test, df_emotion_test])

In [486]:
evaluate_performance(
    df_neutral_train, df_neutral_test, ["neutral"], "distance", "rand_forest"
)

(116, 9)
(58, 9)


0.9827586206896551