In [452]:
import pandas as pd
import numpy as np
import os

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [453]:
df_ckplus_dataset = pd.read_csv("dataset/ckplus_dataset.csv")

In [454]:
def read_emotion_features(emotion):
    distance_features = []
    angle_features = []
    with open(f"features/{emotion}.txt", "r") as file:
        for line in file:
            feature = line.strip().split()
            feature = [int(value) for value in feature[0][2:-1].split(",")]
            if len(feature) == 2:
                distance_features.append(feature)
            else:
                angle_features.append(feature)
    return distance_features, angle_features


def load_emotion_features():
    emotions = ["neutral", "anger", "disgust", "fear", "happiness", "sadness", "surprise"]
    emotion_features = {}
    for emotion in emotions:
        distance_features, angle_features = read_emotion_features(emotion)
        emotion_features[emotion] = (distance_features, angle_features)
    return emotion_features


emotion_features = load_emotion_features()

In [455]:
def euclidean_distance(a, b):
    return np.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)


def smaller_angle(a, b, c):
    a = np.array(a)
    b = np.array(b)
    c = np.array(c)
    ba = a - b
    bc = c - b
    cosine = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    return np.arccos(cosine) * 180 / np.pi


def mid_point(a, b):
    return [(a[0] + b[0]) / 2, (a[1] + b[1]) / 2]


def get_face_width(data):
    point1 = data["landmark_2"].split(" ")
    point1 = [float(point1[0]), float(point1[1])]
    point2 = data["landmark_16"].split(" ")
    point2 = [float(point2[0]), float(point2[1])]
    return euclidean_distance(point1, point2)


def generate_distance_features(data, feature_emotion):
    distance_features_template = emotion_features[feature_emotion][0]

    distance_features = []
    for feature in distance_features_template:
        point1 = data[f"landmark_{feature[0]}"].split(" ")
        point1 = [float(point1[0]), float(point1[1])]
        point2 = data[f"landmark_{feature[1]}"].split(" ")
        point2 = [float(point2[0]), float(point2[1])]

        distance = euclidean_distance(point1, point2)
        distance_features.append(distance)

    if feature_emotion == "neutral":
        point1 = data["landmark_37"].split(" ")
        point1 = [float(point1[0]), float(point1[1])]
        point2 = data["landmark_46"].split(" ")
        point2 = [float(point2[0]), float(point2[1])]
        point3 = data["landmark_9"].split(" ")
        point3 = [float(point3[0]), float(point3[1])]

        distance = euclidean_distance(mid_point(point1, point2), point3)
        distance_features.append(distance)

    distance_features = np.array(distance_features)
    distance_features = distance_features / get_face_width(data)

    return distance_features


def generate_angle_features(data, feature_emotion):
    angle_features_template = emotion_features[feature_emotion][1]

    angle_features = []
    for feature in angle_features_template:
        point1 = data[f"landmark_{feature[0]}"].split(" ")
        point1 = [float(point1[0]), float(point1[1])]
        point2 = data[f"landmark_{feature[1]}"].split(" ")
        point2 = [float(point2[0]), float(point2[1])]
        point3 = data[f"landmark_{feature[2]}"].split(" ")
        point3 = [float(point3[0]), float(point3[1])]

        angle = smaller_angle(point1, point2, point3)
        angle_features.append(angle)

    angle_features = np.array(angle_features)
    angle_features = angle_features / 180

    return angle_features

In [456]:
def generate_features(data, feature_emotion, feature_type):
    features = np.array([])

    if feature_type == "distance" or feature_type == "both":
        distance_features = generate_distance_features(data, feature_emotion)
        features = np.concatenate((features, distance_features))

    if feature_type == "angle" or feature_type == "both":
        angle_features = generate_angle_features(data, feature_emotion)
        features = np.concatenate((features, angle_features))

    return features


def generate_features_multiple(data, feature_emotions, feature_type):
    all_features = np.array([])
    
    for feature_emotion in feature_emotions:
        features = generate_features(data, feature_emotion, feature_type)
        all_features = np.concatenate((all_features, features))
        
    return all_features


def get_features_from_df(df, feature_emotions, feature_type):
    features = []
    for index, row in df.iterrows():
        features.append(generate_features_multiple(row, feature_emotions, feature_type))

    features = np.array(features)
    
    return features


def get_classes_from_df(df):
    classes = []
    for index, row in df.iterrows():
        classes.append(row["subject_id"])
    return np.array(classes)

In [457]:
def get_model(model_type, seed):
    if model_type == "lda":
        return LinearDiscriminantAnalysis()
    elif model_type == "svc":
        return SVC()
    elif model_type == "rand_forest":
        return RandomForestClassifier()
    else:
        raise ValueError("Invalid model type")


def train_model(model, X_train, y_train):
    model.fit(X_train, y_train)
    return model


def test_model(model, X_test, y_test):
    return model.score(X_test, y_test)

In [458]:
def get_subject_session_data(subject_id, session_id):
    return df_ckplus_dataset[
        (df_ckplus_dataset["subject_id"] == subject_id)
        & (df_ckplus_dataset["session_id"] == session_id)
    ]


def get_subjects(emotion, no_of_subjects=None):
    df_subjects = pd.read_csv(f"dataset/{emotion}_filtered.csv")
    if no_of_subjects is not None:
        df_subjects = df_subjects.head(no_of_subjects)
    assert no_of_subjects is None or len(df_subjects) == no_of_subjects
    df_subjects = df_subjects[["subject_id", "session_id"]]
    
    return df_subjects
    

In [459]:
def generate_train_test_data(emotion, no_of_subjects=None, seed=42):
    subjects = get_subjects(emotion=emotion, no_of_subjects=no_of_subjects)

    df_neutral_train = pd.DataFrame()
    df_neutral_test = pd.DataFrame()
    df_emotion_train = pd.DataFrame()
    df_emotion_test = pd.DataFrame()

    for index, row in subjects.iterrows():
        subject_id = row["subject_id"]
        session_id = row["session_id"]
        subject_data = get_subject_session_data(subject_id, session_id)

        neutral_data = subject_data.head(len(subject_data) // 2)
        emotion_data = subject_data.tail(len(subject_data) // 2)
        
        neutral_data = neutral_data.sample(n=3, random_state=seed)
        emotion_data = emotion_data.sample(n=3, random_state=seed)

        neutral_train = neutral_data.head(2)
        neutral_test = neutral_data.tail(1)
        
        emotion_train = emotion_data.head(2)
        emotion_test = emotion_data.tail(1)
        
        df_neutral_train = pd.concat([df_neutral_train, neutral_train])
        df_neutral_test = pd.concat([df_neutral_test, neutral_test])
        
        df_emotion_train = pd.concat([df_emotion_train, emotion_train])
        df_emotion_test = pd.concat([df_emotion_test, emotion_test])
        
    return df_neutral_train, df_neutral_test, df_emotion_train, df_emotion_test

In [460]:
def evaluate_performance(df_train, df_test, feature_emotions: list, feature_type, model_type, seed):
    X_train = get_features_from_df(df_train, feature_emotions, feature_type)
    y_train = get_classes_from_df(df_train)

    X_test = get_features_from_df(df_test, feature_emotions, feature_type)
    y_test = get_classes_from_df(df_test)

    model = get_model(model_type, seed)
    model = train_model(model, X_train, y_train)

    return test_model(model, X_test, y_test)

In [461]:
def run_tests(
    emotion, 
    model_type,
    training_set,
    test_set,
    feature,
    no_of_tests, 
    seed
):

    print(f"{emotion}: {training_set}-{test_set} with {feature} using {model_type}")

    np.random.seed(seed)
    seeds = np.random.randint(0, 1000, no_of_tests)

    scores = np.array([])
    for test in range(no_of_tests):
        print(f"Running test #{test+1}/{no_of_tests}")

        df_neutral_train, df_neutral_test, df_emotion_train, df_emotion_test = (
            generate_train_test_data(emotion=emotion, seed=seeds[test])
        )
        df_combined_train = pd.concat([df_neutral_train, df_emotion_train])
        df_combined_test = pd.concat([df_neutral_test, df_emotion_test])

        df_train = None
        if training_set == "neutral":
            df_train = df_neutral_train
        elif training_set == f"{emotion}":
            df_train = df_emotion_train
        elif training_set == f"neutral_{emotion}":
            df_train = df_combined_train
        assert df_train is not None

        df_test = None
        if test_set == "neutral":
            df_test = df_neutral_test
        elif test_set == f"{emotion}":
            df_test = df_emotion_test
        elif test_set == f"neutral_{emotion}":
            df_test = df_combined_test
        assert df_test is not None

        feature_emotions = None
        if feature == "neutral":
            feature_emotions = ["neutral"]
        elif feature == f"{emotion}":
            feature_emotions = [emotion]
        elif feature == f"neutral_{emotion}":
            feature_emotions = ["neutral", emotion]
        assert feature_emotions is not None

        score = evaluate_performance(
            df_train=df_train,
            df_test=df_test,
            feature_emotions=feature_emotions,
            feature_type="distance",
            model_type=model_type,
            seed=seeds[test],
        )

        print("Score: ", score * 100)
        scores = np.append(scores, score)

    result = {
        "training_set": training_set,
        "test_set": test_set,
        "feature_set": feature,
        "feature_type": "distance",
        "model_type": model_type,
        "accuracy": scores.mean(),
    }

    return result

In [462]:
neutral = "neutral"
emotion = "happiness"
model_type = "lda"
no_of_tests = 25
seed = 101

In [463]:
df_results = pd.DataFrame(columns=["training_set", "test_set", "feature_set", "feature_type", "accuracy"])

In [464]:
# neutral-neutral with neutral
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=neutral,
    test_set=neutral,
    feature=neutral,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# neutral-neutral with emotion
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=neutral,
    test_set=neutral,
    feature=emotion,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# neutral-neutral with both
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=neutral,
    test_set=neutral,
    feature=f"{neutral}_{emotion}",
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

happiness: neutral-neutral with neutral using lda
Running test #1/25
train: 180
test: 90
Score:  95.55555555555556
Running test #2/25
train: 180
test: 90
Score:  98.88888888888889
Running test #3/25
train: 180
test: 90
Score:  97.77777777777777
Running test #4/25
train: 180
test: 90
Score:  100.0
Running test #5/25
train: 180
test: 90
Score:  98.88888888888889
Running test #6/25
train: 180
test: 90
Score:  96.66666666666667
Running test #7/25
train: 180
test: 90
Score:  97.77777777777777
Running test #8/25
train: 180
test: 90
Score:  96.66666666666667
Running test #9/25
train: 180
test: 90
Score:  96.66666666666667
Running test #10/25
train: 180
test: 90
Score:  100.0
Running test #11/25
train: 180
test: 90
Score:  97.77777777777777
Running test #12/25
train: 180
test: 90
Score:  98.88888888888889
Running test #13/25
train: 180
test: 90
Score:  98.88888888888889
Running test #14/25
train: 180
test: 90
Score:  94.44444444444444
Running test #15/25
train: 180
test: 90
Score:  100.0
Runni

In [465]:
# neutral-emotion with neutral
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=neutral,
    test_set=emotion,
    feature=neutral,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# neutral-emotion with emotion
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=neutral,
    test_set=emotion,
    feature=emotion,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# neutral-emotion with both
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=neutral,
    test_set=emotion,
    feature=f"{neutral}_{emotion}",
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

happiness: neutral-happiness with neutral using lda
Running test #1/25
train: 180
test: 90
Score:  84.44444444444444
Running test #2/25
train: 180
test: 90
Score:  82.22222222222221
Running test #3/25
train: 180
test: 90
Score:  84.44444444444444
Running test #4/25
train: 180
test: 90
Score:  84.44444444444444
Running test #5/25
train: 180
test: 90
Score:  86.66666666666667
Running test #6/25
train: 180
test: 90
Score:  83.33333333333334
Running test #7/25
train: 180
test: 90
Score:  76.66666666666667
Running test #8/25
train: 180
test: 90
Score:  83.33333333333334
Running test #9/25
train: 180
test: 90
Score:  78.88888888888889
Running test #10/25
train: 180
test: 90
Score:  81.11111111111111
Running test #11/25
train: 180
test: 90
Score:  86.66666666666667
Running test #12/25
train: 180
test: 90
Score:  86.66666666666667
Running test #13/25
train: 180
test: 90
Score:  86.66666666666667
Running test #14/25
train: 180
test: 90
Score:  82.22222222222221
Running test #15/25
train: 180
te

In [466]:
# emotion-neutral with neutral
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=emotion,
    test_set=neutral,
    feature=neutral,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# emotion-neutral with emotion
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=emotion,
    test_set=neutral,
    feature=emotion,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# emotion-neutral with both
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=emotion,
    test_set=neutral,
    feature=f"{neutral}_{emotion}",
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

happiness: happiness-neutral with neutral using lda
Running test #1/25
train: 180
test: 90
Score:  78.88888888888889
Running test #2/25
train: 180
test: 90
Score:  66.66666666666666
Running test #3/25
train: 180
test: 90
Score:  81.11111111111111
Running test #4/25
train: 180
test: 90
Score:  83.33333333333334
Running test #5/25
train: 180
test: 90
Score:  58.88888888888889
Running test #6/25
train: 180
test: 90
Score:  77.77777777777779
Running test #7/25
train: 180
test: 90
Score:  74.44444444444444
Running test #8/25
train: 180
test: 90
Score:  81.11111111111111
Running test #9/25
train: 180
test: 90
Score:  82.22222222222221
Running test #10/25
train: 180
test: 90
Score:  91.11111111111111
Running test #11/25
train: 180
test: 90
Score:  83.33333333333334
Running test #12/25
train: 180
test: 90
Score:  58.88888888888889
Running test #13/25
train: 180
test: 90
Score:  71.11111111111111
Running test #14/25
train: 180
test: 90
Score:  83.33333333333334
Running test #15/25
train: 180
te

In [467]:
# emotion-emotion with neutral
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=emotion,
    test_set=emotion,
    feature=neutral,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# emotion-emotion with emotion
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=emotion,
    test_set=emotion,
    feature=emotion,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# emotion-emotion with both
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=emotion,
    test_set=emotion,
    feature=f"{neutral}_{emotion}",
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

happiness: happiness-happiness with neutral using lda
Running test #1/25
train: 180
test: 90
Score:  100.0
Running test #2/25
train: 180
test: 90
Score:  100.0
Running test #3/25
train: 180
test: 90
Score:  100.0
Running test #4/25
train: 180
test: 90
Score:  98.88888888888889
Running test #5/25
train: 180
test: 90
Score:  98.88888888888889
Running test #6/25
train: 180
test: 90
Score:  100.0
Running test #7/25
train: 180
test: 90
Score:  98.88888888888889
Running test #8/25
train: 180
test: 90
Score:  96.66666666666667
Running test #9/25
train: 180
test: 90
Score:  100.0
Running test #10/25
train: 180
test: 90
Score:  100.0
Running test #11/25
train: 180
test: 90
Score:  98.88888888888889
Running test #12/25
train: 180
test: 90
Score:  98.88888888888889
Running test #13/25
train: 180
test: 90
Score:  100.0
Running test #14/25
train: 180
test: 90
Score:  95.55555555555556
Running test #15/25
train: 180
test: 90
Score:  100.0
Running test #16/25
train: 180
test: 90
Score:  98.8888888888

In [468]:
# both-both with neutral
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=f"{neutral}_{emotion}",
    test_set=f"{neutral}_{emotion}",
    feature=neutral,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# emotion-neutral with emotion
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=f"{neutral}_{emotion}",
    test_set=f"{neutral}_{emotion}",
    feature=emotion,
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

# emotion-neutral with both
result = run_tests(
    emotion=emotion,
    model_type=model_type,
    training_set=f"{neutral}_{emotion}",
    test_set=f"{neutral}_{emotion}",
    feature=f"{neutral}_{emotion}",
    no_of_tests=no_of_tests,
    seed=seed,
)
df_results.loc[len(df_results)] = result
print(df_results)
print()

happiness: neutral_happiness-neutral_happiness with neutral using lda
Running test #1/25
train: 360
test: 180
Score:  99.44444444444444
Running test #2/25
train: 360
test: 180
Score:  100.0
Running test #3/25
train: 360
test: 180
Score:  98.33333333333333
Running test #4/25
train: 360
test: 180
Score:  97.22222222222221
Running test #5/25
train: 360
test: 180
Score:  98.33333333333333
Running test #6/25
train: 360
test: 180
Score:  98.33333333333333
Running test #7/25
train: 360
test: 180
Score:  97.77777777777777
Running test #8/25
train: 360
test: 180
Score:  98.33333333333333
Running test #9/25
train: 360
test: 180
Score:  99.44444444444444
Running test #10/25
train: 360
test: 180
Score:  98.33333333333333
Running test #11/25
train: 360
test: 180
Score:  98.88888888888889
Running test #12/25
train: 360
test: 180
Score:  98.33333333333333
Running test #13/25
train: 360
test: 180
Score:  98.33333333333333
Running test #14/25
train: 360
test: 180
Score:  98.33333333333333
Running test 

In [469]:
df_results

Unnamed: 0,training_set,test_set,feature_set,feature_type,accuracy
0,neutral,neutral,neutral,distance,0.976
1,neutral,neutral,happiness,distance,0.969333
2,neutral,neutral,neutral_happiness,distance,0.996
3,neutral,happiness,neutral,distance,0.837333
4,neutral,happiness,happiness,distance,0.801333
5,neutral,happiness,neutral_happiness,distance,0.967111
6,happiness,neutral,neutral,distance,0.748444
7,happiness,neutral,happiness,distance,0.640444
8,happiness,neutral,neutral_happiness,distance,0.920889
9,happiness,happiness,neutral,distance,0.984444
