In [None]:
import numpy as np
import tensorflow as tf

# Preprocess the facial action unit data for each video frame
# by aligning and cropping the faces, and possibly extracting
# relevant features such as Action Units (AUs) or Facial Action
# Coding System (FACS) codes.

# Assume that the preprocessed data is stored in a NumPy array
# called "X" with shape (n_samples, sequence_length, n_features).
X = ...

# Split the dataset into training and test sets, and pad or truncate
# the sequences of facial action units so that they all have the same length.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Define a neural network model with an attention layer
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(sequence_length, n_features)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Attention(),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model with an appropriate loss function
# (such as binary cross-entropy) and optimization algorithm
# (such as Adam).
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model on the training data
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test loss:', test_loss)
print('Test accuracy:', test_acc)

# Use the model to make predictions on new data
y_pred = model.predict(X_test)


In [None]:
# Get the attention layer of the model
attention_layer = model.layers[2]

# Get the attention weights for a given input
attention_weights = attention_layer.get_weights()[0]
attention_weights = np.sum(attention_weights, axis=0)

# Normalize the attention weights
attention_weights = attention_weights / np.max(attention_weights)

# Visualize the attention weights
import matplotlib.pyplot as plt
plt.plot(attention_weights)
plt.show()


In [None]:
import numpy as np

# Preprocess the facial action unit data for each video frame
# by aligning and cropping the faces, and possibly extracting
# relevant features such as Action Units (AUs) or Facial Action
# Coding System (FACS) codes.

# Assume that the preprocessed data is stored in a NumPy array
# called "X" with shape (n_samples, sequence_length, n_features).
X = ...

# Get the attention weights for a given sample
sample_index = 0  # choose a sample index
attention_weights = attention_layer.get_weights()[0]
attention_weights = np.sum(attention_weights, axis=0)[sample_index]

# Normalize the attention weights
attention_weights = attention_weights / np.max(attention_weights)

# Get the facial action unit data for the sample
facial_action_units = X[sample_index]

# Sort the attention weights in descending order
sorted_indices = np.argsort(attention_weights)[::-1]

# Extract the most important facial action units from the most important frames
most_important_facial_action_units = facial_action_units[sorted_indices]
most_important_facial_action_units = most_important_facial_action_units[:n_most_important]  # choose a value for n_most_important

# You can then analyze the most important facial action units to see if there are any patterns or trends that might be indicative of depression.


In [None]:
# Define a dictionary of rules that map action units to emotions
rules = {
    'happiness': {6: 1, 12: 0},  # cheek raiser and lip corner puller absent
    'sadness': {1: 1, 4: 1, 15: 1},  # inner brow raiser, brow lowerer, and lid tightener present
    'surprise': {1: 1, 2: 1, 5: 1},  # inner brow raiser, outer brow raiser, and brow lowerer present
    'fear': {1: 1, 2: 1, 4: 1, 5: 1, 7: 1, 20: 1},  # inner brow raiser, outer brow raiser, brow lowerer, lid tightener, and upper lip raiser present
    'anger': {4: 1, 5: 1, 7: 1, 23: 1},  # brow lowerer, lid tightener, and upper lip raiser present
    'disgust': {9: 1, 15: 1, 16: 1},  # nose wrinkler, lid tightener, and upper lip raiser present
}


In [None]:
To find the proportion of an interview that each action unit (AU) occurred, 
you can use the action unit occurrences data from the dataset and calculate the 
proportion of frames in which each AU occurred. 
Here is a sample Python code that demonstrates how you could do this:

import pandas as pd

# Load the action unit occurrences from the CSV file
occurrences = pd.read_csv('occurrences.csv')

# Calculate the total number of frames in the interview
n_frames = occurrences.shape[0]

# Initialize a dictionary to store the proportions of frames in which each AU occurred
au_proportions = {}

# Iterate over the columns in the dataframe
for column in occurrences.columns:
    # If the column is an action unit, calculate the proportion of frames in which it occurred
    if column.startswith('AU'):
        au_proportions[column] = occurrences[column].sum() / n_frames

# Print the proportions of frames in which each AU occurred
print(au_proportions)


This code loads the action unit occurrences from the CSV file, 
calculates the total number of frames in the interview, initializes a 
dictionary to store the proportions of frames in which each AU occurred, 
and then iterates over the columns in the dataframe to calculate the proportion 
of frames in which each AU occurred. Finally, it prints the proportions of frames 
in which each AU occurred.



In [None]:
I have a dataset with 3 modalities, video, text, and audio. 
regarding videos, I have OpenFace output which is Action Units, 
head pose, and eye gaze. regarding audio, I have MFCC and eGeMaps 
features and regarding text I have transcripts. I have also deep 
representations form videos from Resnet and VGG networks. 
I have these data from interviews with 200 participants. 
the label of the dataset the depression severity of each 
person which can be a number from 1 to 50. can you please tell
me how I can train a machine-learning model with all these modalities to detect depression severity?


import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the action unit intensities and occurrences from the CSV file
au_intensities = pd.read_csv('au_intensities.csv')
au_occurrences = pd.read_csv('au_occurrences.csv')

# Load the head pose and eye gaze features from the CSV file
head_pose = pd.read_csv('head_pose.csv')
eye_gaze = pd.read_csv('eye_gaze.csv')

# Load the MFCC and eGeMaps features from the CSV file
mfcc = pd.read_csv('mfcc.csv')
egemaps = pd.read_csv('egemaps.csv')

# Load the transcripts from the CSV file
transcripts = pd.read_csv('transcripts.csv')

# Load the deep representations from the CSV files
resnet = pd.read_csv('resnet.csv')
vgg = pd.read_csv('vgg.csv')

# Load the depression severity labels from the CSV file
labels = pd.read_csv('labels.csv')

# Split the data into training and test sets
au_intensities_train, au_intensities_test, au_occurrences_train, au_occurrences_test, head_pose_train, head_pose_test, eye_gaze_train, eye_gaze_test, mfcc_train, mfcc_test, egemaps_train, egemaps_test, transcripts_train, transcripts_test, resnet

# Train separate models for each modality using the training data
au_intensities_model = RandomForestClassifier().fit(au_intensities_train, labels_train)
au_occurrences_model = RandomForestClassifier().fit(au_occurrences_train, labels_train)
head_pose_model = RandomForestClassifier().fit(head_pose_train, labels_train)
eye_gaze_model = RandomForestClassifier().fit(eye_gaze_train, labels_train)
mfcc_model = RandomForestClassifier().fit(mfcc_train, labels_train)
egemaps_model = RandomForestClassifier().fit(egemaps_train, labels_train)
transcripts_model = RandomForestClassifier().fit(transcripts_train, labels_train)
resnet_model = RandomForestClassifier().fit(resnet_train, labels_train)
vgg_model = RandomForestClassifier().fit(vgg_train, labels_train)

# Make predictions on the test data using each of the models
au_intensities_predictions = au_intensities_model.predict(au_intensities_test)
au_occurrences_predictions = au_occurrences_model.predict(au_occurrences_test)
head_pose_predictions = head_pose_model.predict(head_pose_test)
eye_gaze_predictions = eye_gaze_model.predict(eye_gaze_test)
mfcc_predictions = mfcc_model.predict(mfcc_test)
egemaps_predictions = egemaps_model.predict(egemaps_test)
transcripts_predictions = transcripts_model.predict(transcripts_test)
resnet_predictions = resnet_model.predict(resnet_test)
vgg_predictions = vgg_model.predict(vgg_test)

# Combine the predictions using a weighted average
predictions = (au_intensities_predictions * 0.1 + au_occurrences_predictions * 0.1 + head_pose_predictions * 0.1 + eye_gaze_predictions * 0.1 + mfcc_predictions * 0.1 + egemaps_predictions * 0.1 + transcripts_predictions * 0.1 + resnet_predictions * 0.2 + vgg_predictions * 0.2) / 2

# Calculate the mean squared error between the predictions and the true labels
mse = mean_squared_error(labels_


In [None]:
# Make predictions on the test data using each of the models
au_intensities_predictions = au_intensities_model.predict(au_intensities_test)
au_occurrences_predictions = au_occurrences_

In [None]:
import pandas as pd
import numpy as np

# Load the eye gaze data from the CSV file
eye_gaze = pd.read_csv('eye_gaze.csv')

# Load the head location and rotation data from the CSV file
head_location = pd.read_csv('head_location.csv')
head_rotation = pd.read_csv('head_rotation.csv')

# Set the threshold for significant eye gaze changes
eye_gaze_threshold = 0.05

# Initialize variables to store the eye gaze frame numbers and the eye gaze direction vectors
eye_gaze_frames = []
eye_gaze_vectors = []

# Iterate through the eye gaze data
for i in range(1, eye_gaze.shape[0]):
    # Calculate the difference between the current frame and the previous frame
    eye_gaze_difference = abs(eye_gaze.iloc[i][['X', 'Y']] - eye_gaze.iloc[i-1][['X', 'Y']])
    
    # If the difference is above the threshold, store the frame number and the eye gaze direction vector
    if eye_gaze_difference.sum() > eye_gaze_threshold:
        eye_gaze_frames.append(i)
        eye_gaze_vectors.append(eye_gaze.iloc[i][['X', 'Y', 'Z']].values)

# Convert the head rotation data to a rotation matrix
Rx, Ry, Rz = head_rotation[['Rx', 'Ry', 'Rz']].values.T
Rx = np.array([[1, 0, 0], [0, np.cos(Rx), -np.sin(Rx)], [0, np.sin(Rx), np.cos(Rx)]])
Ry = np.array([[np.cos(Ry), 0, np.sin(Ry)], [0, 1, 0], [-np.sin(Ry), 0, np.cos(Ry)]]
