In [71]:
from pathlib import Path
import pandas as pd
from keras.layers import BatchNormalization, Dense, Input, Conv1D, Add, ELU, Flatten, MaxPooling1D
from keras.optimizers import SGD
from keras import Model

In [2]:
def load_concatenated_aud_vid(
    file_path: Path, feature_type: str, delimeter: str
): #-> Dict[str, Dict[str, pd.DataFrame]]:
    """TODO"""
    if not Path.exists(file_path):
        print("Directory does not exist. Check input feature directory")
    # loaded_features: Dict = {}
    loaded_features = {}
    generator = (file_path).glob("*")
    for path in generator:
        str_path = str(path)
        start = str_path.rindex("\\")
        end = str_path.rindex("_")
        participant_id = str_path[start + 1 : end]
        full_path = path / ("features/" + participant_id + feature_type)
        participant_id_df = pd.read_csv(full_path, sep=delimeter)
        participant_id_df.columns = participant_id_df.columns.str.replace(" ", "")
        loaded_features[participant_id] = participant_id_df
    if not loaded_features:
        print(
            "No samples loaded, check the samples are available in the input directory."
        )
    return loaded_features

In [3]:
# directly load saved (combined) audio video csv files
daic_audio_video_train = load_concatenated_aud_vid(Path("daic_processed_1/concat_vid_aud/train/"), "_Concat_openface_mfcc.csv", ",")
daic_audio_video_dev = load_concatenated_aud_vid(Path("daic_processed_1/concat_vid_aud/dev/"), "_Concat_openface_mfcc.csv", ",")
daic_audio_video_test = load_concatenated_aud_vid(Path("daic_processed_1/concat_vid_aud/test/"), "_Concat_openface_mfcc.csv", ",")

In [4]:
for subject_id, subject_df in daic_audio_video_train.items():
    subject_df.drop(columns=["frame", "timestamp", "confidence", "success", "AU01_c", "AU02_c", "AU04_c", "AU05_c", "AU06_c", "AU07_c", "AU09_c", "AU10_c", "AU12_c", "AU14_c", "AU15_c", "AU17_c", "AU20_c", "AU23_c", "AU25_c", "AU26_c", "AU28_c", "AU45_c"], inplace=True)
for subject_id, subject_df in daic_audio_video_dev.items():
    subject_df.drop(columns=["frame", "timestamp", "confidence", "success", "AU01_c", "AU02_c", "AU04_c", "AU05_c", "AU06_c", "AU07_c", "AU09_c", "AU10_c", "AU12_c", "AU14_c", "AU15_c", "AU17_c", "AU20_c", "AU23_c", "AU25_c", "AU26_c", "AU28_c", "AU45_c"], inplace=True)
for subject_id, subject_df in daic_audio_video_test.items():
    subject_df.drop(columns=["frame", "timestamp", "confidence", "success", "AU01_c", "AU02_c", "AU04_c", "AU05_c", "AU06_c", "AU07_c", "AU09_c", "AU10_c", "AU12_c", "AU14_c", "AU15_c", "AU17_c", "AU20_c", "AU23_c", "AU25_c", "AU26_c", "AU28_c", "AU45_c"], inplace=True)

In [5]:
import pandas as pd
label_path = Path("daic_dataset/daic_labels")
loaded_labels = {
    "dev": {}, 
    "train": {}, 
    "test": {},
    }
paths = {
    "dev": label_path / "dev_split.csv",
    "train": label_path / "train_split.csv",
    "test": label_path / "test_split.csv",
}
for subset, path in paths.items():
    subset_df = pd.read_csv(path)
    subset_dict = subset_df.to_dict()
    for i in range(len(subset_dict["Participant_ID"])):
        participant = str(subset_dict["Participant_ID"][i])
        loaded_labels[subset][participant] = {
            "PHQ_Binary": subset_dict["PHQ_Binary"][i],
            "PHQ_Score": subset_dict["PHQ_Score"][i],
        }

In [6]:
all_data = [daic_audio_video_train, daic_audio_video_dev, daic_audio_video_test]
def find_min_max():
    """Finds the minimum and maximum values of all feature for normalisation."""
    # only for openface features at the moment

    # !!As of now this doesn't work correctly!!
    # LEAVE THIS TO DO AFTER PREPROCESSING, some of feature values for success == 0 are ridiculous and will most likely need to be zeroed
    min_max = {
        "min": {
            "pose_Tx": 1000,
            "pose_Ty": 1000,
            "pose_Tz": 1000,
            "pose_Rx": 1000,
            "pose_Ry": 1000,
            "pose_Rz": 1000,
            "gaze_0_x": 1000,
            "gaze_0_y": 1000,
            "gaze_0_z": 1000,
            "gaze_1_x": 1000,
            "gaze_1_y": 1000,
            "gaze_1_z": 1000,
            "gaze_angle_x": 1000,
            "gaze_angle_y": 1000,
            "AU01_r": 1000,
            "AU02_r": 1000,
            "AU04_r": 1000,
            "AU05_r": 1000,
            "AU06_r": 1000,
            "AU07_r": 1000,
            "AU09_r": 1000,
            "AU10_r": 1000,
            "AU12_r": 1000,
            "AU14_r": 1000,
            "AU15_r": 1000,
            "AU17_r": 1000,
            "AU20_r": 1000,
            "AU23_r": 1000,
            "AU25_r": 1000,
            "AU26_r": 1000,
            "AU45_r": 1000,
            "pcm_fftMag_mfcc[0]": 1000,
            "pcm_fftMag_mfcc[1]": 1000,
            "pcm_fftMag_mfcc[2]": 1000,
            "pcm_fftMag_mfcc[3]": 1000,
            "pcm_fftMag_mfcc[4]": 1000,
            "pcm_fftMag_mfcc[5]": 1000,
            "pcm_fftMag_mfcc[6]": 1000,
            "pcm_fftMag_mfcc[7]": 1000,
            "pcm_fftMag_mfcc[8]": 1000,
            "pcm_fftMag_mfcc[9]": 1000,
            "pcm_fftMag_mfcc[10]": 1000,
            "pcm_fftMag_mfcc[11]": 1000,
            "pcm_fftMag_mfcc[12]": 1000,
            "pcm_fftMag_mfcc_de[0]": 1000,
            "pcm_fftMag_mfcc_de[1]": 1000,
            "pcm_fftMag_mfcc_de[2]": 1000,
            "pcm_fftMag_mfcc_de[3]": 1000,
            "pcm_fftMag_mfcc_de[4]": 1000,
            "pcm_fftMag_mfcc_de[5]": 1000,
            "pcm_fftMag_mfcc_de[6]": 1000,
            "pcm_fftMag_mfcc_de[7]": 1000,
            "pcm_fftMag_mfcc_de[8]": 1000,
            "pcm_fftMag_mfcc_de[9]": 1000,
            "pcm_fftMag_mfcc_de[10]": 1000,
            "pcm_fftMag_mfcc_de[11]": 1000,
            "pcm_fftMag_mfcc_de[12]": 1000,
            "pcm_fftMag_mfcc_de_de[0]": 1000,
            "pcm_fftMag_mfcc_de_de[1]": 1000,
            "pcm_fftMag_mfcc_de_de[2]": 1000,
            "pcm_fftMag_mfcc_de_de[3]": 1000,
            "pcm_fftMag_mfcc_de_de[4]": 1000,
            "pcm_fftMag_mfcc_de_de[5]": 1000,
            "pcm_fftMag_mfcc_de_de[6]": 1000,
            "pcm_fftMag_mfcc_de_de[7]": 1000,
            "pcm_fftMag_mfcc_de_de[8]": 1000,
            "pcm_fftMag_mfcc_de_de[9]": 1000,
            "pcm_fftMag_mfcc_de_de[10]": 1000,
            "pcm_fftMag_mfcc_de_de[11]": 1000,
            "pcm_fftMag_mfcc_de_de[12]": 1000,
        },
        "max": {
            "pose_Tx": 0,
            "pose_Ty": 0,
            "pose_Tz": 0,
            "pose_Rx": 0,
            "pose_Ry": 0,
            "pose_Rz": 0,
            "gaze_0_x": 0,
            "gaze_0_y": 0,
            "gaze_0_z": 0,
            "gaze_1_x": 0,
            "gaze_1_y": 0,
            "gaze_1_z": 0,
            "gaze_angle_x": 0,
            "gaze_angle_y": 0,
            "AU01_r": 0,
            "AU02_r": 0,
            "AU04_r": 0,
            "AU05_r": 0,
            "AU06_r": 0,
            "AU07_r": 0,
            "AU09_r": 0,
            "AU10_r": 0,
            "AU12_r": 0,
            "AU14_r": 0,
            "AU15_r": 0,
            "AU17_r": 0,
            "AU20_r": 0,
            "AU23_r": 0,
            "AU25_r": 0,
            "AU26_r": 0,
            "AU45_r": 0,
            "pcm_fftMag_mfcc[0]": -1000,
            "pcm_fftMag_mfcc[1]": -1000,
            "pcm_fftMag_mfcc[2]": -1000,
            "pcm_fftMag_mfcc[3]": -1000,
            "pcm_fftMag_mfcc[4]": -1000,
            "pcm_fftMag_mfcc[5]": -1000,
            "pcm_fftMag_mfcc[6]": -1000,
            "pcm_fftMag_mfcc[7]": -1000,
            "pcm_fftMag_mfcc[8]": -1000,
            "pcm_fftMag_mfcc[9]": -1000,
            "pcm_fftMag_mfcc[10]": -1000,
            "pcm_fftMag_mfcc[11]": -1000,
            "pcm_fftMag_mfcc[12]": -1000,
            "pcm_fftMag_mfcc_de[0]": -1000,
            "pcm_fftMag_mfcc_de[1]": -1000,
            "pcm_fftMag_mfcc_de[2]": -1000,
            "pcm_fftMag_mfcc_de[3]": -1000,
            "pcm_fftMag_mfcc_de[4]": -1000,
            "pcm_fftMag_mfcc_de[5]": -1000,
            "pcm_fftMag_mfcc_de[6]": -1000,
            "pcm_fftMag_mfcc_de[7]": -1000,
            "pcm_fftMag_mfcc_de[8]": -1000,
            "pcm_fftMag_mfcc_de[9]": -1000,
            "pcm_fftMag_mfcc_de[10]": -1000,
            "pcm_fftMag_mfcc_de[11]": -1000,
            "pcm_fftMag_mfcc_de[12]": -1000,
            "pcm_fftMag_mfcc_de_de[0]": -1000,
            "pcm_fftMag_mfcc_de_de[1]": -1000,
            "pcm_fftMag_mfcc_de_de[2]": -1000,
            "pcm_fftMag_mfcc_de_de[3]": -1000,
            "pcm_fftMag_mfcc_de_de[4]": -1000,
            "pcm_fftMag_mfcc_de_de[5]": -1000,
            "pcm_fftMag_mfcc_de_de[6]": -1000,
            "pcm_fftMag_mfcc_de_de[7]": -1000,
            "pcm_fftMag_mfcc_de_de[8]": -1000,
            "pcm_fftMag_mfcc_de_de[9]": -1000,
            "pcm_fftMag_mfcc_de_de[10]": -1000,
            "pcm_fftMag_mfcc_de_de[11]": -1000,
            "pcm_fftMag_mfcc_de_de[12]": -1000,
        },
    }
    for subset in all_data:
        for subject_id, subject_df in subset.items():
            # for subject, dataframe in subjects.items():
            for feature, _ in min_max["min"].items():
                min_value = subject_df.iloc[:][feature].min()
                max_value = subject_df.iloc[:][feature].max()
                # if min_value < -1000:
                #     print(subject_id)
                # if max_value > 1000:
                #     print(subject_id)
                if min_value < min_max["min"][feature]:
                    min_max["min"][feature] = min_value
                if max_value > min_max["max"][feature]:
                    min_max["max"][feature] = max_value
    return min_max
min_max_values = find_min_max()
# print(min_max_values)
min_max_df = pd.DataFrame.from_dict(min_max_values, orient="index")

In [None]:
normalised_dev = daic_audio_video_dev.copy()
normalised_test = daic_audio_video_test.copy()
normalised_train = daic_audio_video_train.copy()
all_data = [normalised_dev, normalised_test, normalised_train]
for subset in all_data:
    for subject_id, subject_df in subset.items():
        print(subject_id)
        for row in range(subject_df.shape[0]):
            values = subject_df.iloc[row]
            normalised_values = (values-min_max_df.loc["min"])/(min_max_df.loc["max"]-min_max_df.loc["min"])
            subject_df.iloc[row] = normalised_values

In [8]:
# directly load saved (combined) audio video csv files
normalised_train = load_concatenated_aud_vid(Path("daic_processed_1/normalised_concat_vid_aud/train/"), "_normalised_openface_mfcc.csv", ",")
normalised_dev = load_concatenated_aud_vid(Path("daic_processed_1/normalised_concat_vid_aud/dev/"), "_normalised_openface_mfcc.csv", ",")
normalised_test = load_concatenated_aud_vid(Path("daic_processed_1/normalised_concat_vid_aud/test/"), "_normalised_openface_mfcc.csv", ",")

In [None]:
# print(normalised_dev["300"].loc[0:4999].columns)
test1 = normalised_dev["300"].copy().loc[0:4999][["pose_Tx", "pose_Ty", "pose_Tz", "pose_Rx", "pose_Ry", "pose_Rz"]]
print(test1)
# print(normalised_dev["300"].loc[0:4999][["pose_Tx":""]])

In [None]:
resampled_dev = {}
resampled_train = {}
resampled_test = {}
for subject_id, subject_df in normalised_dev.items():
    temp = subject_df.copy().loc[0:4999][["pose_Tx", "pose_Ty", "pose_Tz", "pose_Rx", "pose_Ry", "pose_Rz"]]
    resampled_dev[subject_id] = temp
for subject_id, subject_df in normalised_train.items():
    temp = subject_df.copy().loc[0:4999][["pose_Tx", "pose_Ty", "pose_Tz", "pose_Rx", "pose_Ry", "pose_Rz"]]
    resampled_train[subject_id] = temp
for subject_id, subject_df in normalised_test.items():
    temp = subject_df.copy().loc[0:4999][["pose_Tx", "pose_Ty", "pose_Tz", "pose_Rx", "pose_Ry", "pose_Rz"]]
    resampled_test[subject_id] = temp
        

In [None]:
print(resampled_train["302"])

In [37]:
X_dev = []
Y_dev = []
dev_samples_subject = []
for subject_id, subject_df in resampled_dev.items():
    X_dev.append(subject_df.to_numpy())
    Y_dev.append(loaded_labels["dev"][subject_id]["PHQ_Binary"])
    dev_samples_subject.append(subject_id)
X_train = []
Y_train = []
train_samples_subject = []
for subject_id, subject_df in resampled_train.items():
    X_train.append(subject_df.to_numpy())
    Y_train.append(loaded_labels["train"][subject_id]["PHQ_Binary"])
    train_samples_subject.append(subject_id)
X_test = []
Y_test = []
test_samples_subject = []
for subject_id, subject_df in resampled_test.items():
    X_test.append(subject_df.to_numpy())
    Y_test.append(loaded_labels["test"][subject_id]["PHQ_Binary"])
    test_samples_subject.append(subject_id)

In [96]:
print(X_dev[0].T.shape)

(6, 5000)


In [38]:
import numpy as np
X_dev_np = np.array(X_dev)
X_train_np = np.array(X_train)
X_test_np = np.array(X_test)
Y_dev_np = np.array(Y_dev)
Y_train_np = np.array(Y_train)
Y_test_np = np.array(Y_test)

In [52]:
print(Y_dev_np)

[0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 1 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0]


In [40]:
input_pose = Input(shape=[5000, 6])

In [76]:
# One DCB
# first layer of the DCB
def dilated_conv_block(inputs, feature_dim):
    # with K.name_scope(block_name)
    l1_p1 = Conv1D(filters=feature_dim, kernel_size=3, padding="same", dilation_rate=1)(inputs)
    l1_p2 = Conv1D(filters=feature_dim, kernel_size=3, padding="same", dilation_rate=1)(inputs)
    l1_add = Add()([l1_p1, l1_p2])
    l1_ELU = ELU()(l1_add)
    # second layer of the DCB
    l2_p1 = Conv1D(filters=feature_dim, kernel_size=5, padding="same", dilation_rate=2)(l1_ELU)
    l2_p2 = Conv1D(filters=feature_dim, kernel_size=5, padding="same", dilation_rate=2)(l1_ELU)
    l2_add = Add()([l2_p1, l2_p2])
    l2_ELU = ELU()(l2_add)
    # third layer of the DCB
    l3_p1 = Conv1D(filters=feature_dim, kernel_size=9, padding="same", dilation_rate=4)(l2_ELU)
    l3_p2 = Conv1D(filters=feature_dim, kernel_size=9, padding="same", dilation_rate=4)(l2_ELU)
    l3_add = Add()([l3_p1, l3_p2])
    l3_ELU = ELU()(l3_add)

    residual = Conv1D(filters=1, kernel_size=1, padding="same", dilation_rate=1)(inputs)
    res_add = Add()([l3_ELU, residual])
    bn = BatchNormalization()(res_add)
    
    return bn

In [101]:
# TDCN block for pose information
dcb_1 = dilated_conv_block(input_pose, 128)
mp_1 = MaxPooling1D(pool_size=2, strides=2, padding='valid')(dcb_1)
dcb_2 = dilated_conv_block(mp_1, 64)
mp_2 = MaxPooling1D(pool_size=2, strides=2, padding='valid')(dcb_2)
dcb_3 = dilated_conv_block(mp_2, 256)
mp_3 = MaxPooling1D(pool_size=2, strides=2, padding='valid')(dcb_3)
dcb_4 = dilated_conv_block(mp_3, 128)
mp_4 = MaxPooling1D(pool_size=2, strides=2, padding='valid')(dcb_4)
dcb_5 = dilated_conv_block(mp_4, 64)

# FC layer
flatten = Flatten()(dcb_5)
FC_l1 = Dense(100, activation = "sigmoid")(flatten)
FC_l2 = Dense(100, activation = "sigmoid")(FC_l1)
FC_l3 = Dense(100, activation = "sigmoid")(FC_l2)
output = Dense(1, activation = "sigmoid")(FC_l3)

In [102]:
model = Model(inputs=[input_pose], outputs=[output])
opt = SGD(learning_rate = 2e-5, momentum = 0.9)
model.compile(loss = "binary_crossentropy", optimizer=opt, metrics=['accuracy'])


In [103]:
model.fit(X_train_np[:], Y_train_np[:], validation_data=(X_dev_np[:], Y_dev_np[:]), epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x20f936f1cc0>

In [104]:
# y_test_pred = model.predict(X_test_np)
print(model.evaluate(x=X_test_np, y=Y_test_np))

[0.6259543299674988, 0.6964285969734192]


In [105]:
model.summary()

Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 5000, 6)]    0           []                               
                                                                                                  
 conv1d_177 (Conv1D)            (None, 5000, 128)    2432        ['input_1[0][0]']                
                                                                                                  
 conv1d_178 (Conv1D)            (None, 5000, 128)    2432        ['input_1[0][0]']                
                                                                                                  
 add_100 (Add)                  (None, 5000, 128)    0           ['conv1d_177[0][0]',             
                                                                  'conv1d_178[0][0]']      