In [14]:
from pathlib import Path
import pandas as pd

# from keras.layers import BatchNormalization, Dense, Input, Conv1D, Add, ELU, Flatten, MaxPooling1D
# from keras.layers import GlobalAveragePooling1D, Softmax, Concatenate, Reshape, Multiply, ReLU
# from keras.optimizers import SGD
# from keras import activations
# from keras import Model
# from keras.initializers import HeNormal
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

# from keras.utils.vis_utils import plot_model
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torcheval.metrics import BinaryAccuracy, BinaryPrecision, BinaryRecall, BinaryF1Score, BinaryConfusionMatrix
from torchvision import datasets
from torchvision.transforms import Compose, ToTensor
from pytorch_symbolic import Input, SymbolicModel, useful_layers

In [15]:
import os
# dataset
class DAICDataset(Dataset):
    def __init__(
        self,
        annotations_file,
        sample_dir,
        feature_type,
        # transform=None,
        target_transform=None,
    ):
        self.depression_labels = pd.read_csv(annotations_file)
        self.sample_dir = sample_dir
        self.feature_type = feature_type
        self.transform = Compose([ToTensor()])
        self.target_transform = target_transform

    def __len__(self):
        return len(self.depression_labels)

    def __getitem__(self, idx):
        participant_id = str(self.depression_labels.iloc[idx, 0])
        individual = participant_id + "/" + participant_id + "_CLNF_" + self.feature_type + ".txt"
        participant_path = os.path.join(
            self.sample_dir,
            individual,
        )
        data = pd.read_csv(participant_path, sep=",")  # read_image(img_path)
        data.columns = data.columns.str.replace(" ", "")
        data.drop(columns=["frame", "timestamp", "confidence", "success"], inplace=True)
        label = self.depression_labels.iloc[idx, 1]
        if self.transform:
            data = self.transform(data.copy().loc[1000:5999].to_numpy(dtype="float32").transpose())
            # print(type(label))
            # label = self.transform(label)
        if self.target_transform:
            label = self.target_transform(label)
        return data[0], label


label_path = Path("original_daic/labels")
pose_train = DAICDataset(
    annotations_file = label_path / "train_split_Depression_AVEC2017.csv",
    sample_dir = "original_daic/train",
    feature_type = "pose",
    # transform=None,
    target_transform=None,
)
pose_dev = DAICDataset(
    annotations_file = label_path / "dev_split_Depression_AVEC2017.csv",
    sample_dir = "original_daic/dev",
    feature_type = "pose",
    # transform=None,
    target_transform=None,
)
# pose_test = DAICDataset(
#     annotations_file = label_path / "full_test_split.csv",
#     sample_dir = "original_daic/test",
#     feature_type = "pose",
#     # transform=None,
#     target_transform=None,
# )

In [16]:
# dataloader
BATCH_SIZE = 1
train_dataloader = DataLoader(pose_train, batch_size=BATCH_SIZE, shuffle=True)
dev_dataloader = DataLoader(pose_dev, batch_size=BATCH_SIZE, shuffle=True)
# test_dataloader = DataLoader(pose_test, batch_size=BATCH_SIZE, shuffle=True)

In [17]:
input_pose = Input(shape=[6, 5000])
tdcn_dim_pose = [input_pose.shape[1],128,64,256,128,64] # used in Guo's paper
# tdcn_dim_pose = [input_pose[0],128,128,128,128,128]

In [18]:
def diluted_conv_block(inputs, input_dim, feature_dim):
    # with K.name_scope(block_name)
    l1_p1 = nn.Conv1d(input_dim, feature_dim, kernel_size=3, padding="same", dilation=1, groups=1, bias=True)(inputs)
    l1_p2 = nn.Conv1d(input_dim, feature_dim, kernel_size=3, padding="same", dilation=1, groups=1, bias=True)(inputs)
    # l1_add = Add()([l1_p1, l1_p2])
    l1_ELU = nn.ELU()(l1_p1 + l1_p2)
    # second layer of the DCB
    l2_p1 = nn.Conv1d(feature_dim, feature_dim, kernel_size=3, padding="same", dilation=2, groups=1, bias=True)(l1_ELU)
    l2_p2 = nn.Conv1d(feature_dim, feature_dim, kernel_size=3, padding="same", dilation=2, groups=1, bias=True)(l1_ELU)
    # l2_add = Add()([l2_p1, l2_p2])
    l2_ELU = nn.ELU()(l2_p1 + l2_p2)
    # third layer of the DCB
    l3_p1 = nn.Conv1d(feature_dim, feature_dim, kernel_size=3, padding="same", dilation=4, groups=1, bias=True)(l2_ELU)
    l3_p2 = nn.Conv1d(feature_dim, feature_dim, kernel_size=3, padding="same", dilation=4, groups=1, bias=True)(l2_ELU)
    # l3_add = Add()([l3_p1, l3_p2])
    l3_ELU = nn.ELU()(l3_p1 + l3_p2)

    residual = nn.Conv1d(input_dim, feature_dim, kernel_size=1, padding="same")(inputs)
    # res_add = Add()([l3_ELU, residual])
    # res_add = Add()([l1_ELU, residual])
    # res_add = ELU()(res_add)
    bn = nn.BatchNorm1d(num_features=feature_dim)(l3_ELU + residual)
    return bn

In [19]:
def time_diluted_conv_net(feature_dim, input_layer, pool_size, pool_stride):
    dcb_1 = diluted_conv_block(input_layer, feature_dim[0], feature_dim[1])
    mp_1 = nn.MaxPool1d(pool_size, stride=pool_stride)(dcb_1)
    dcb_2 = diluted_conv_block(mp_1, feature_dim[1], feature_dim[2])
    mp_2 = nn.MaxPool1d(pool_size, stride=pool_stride)(dcb_2)
    dcb_3 = diluted_conv_block(mp_2, feature_dim[2], feature_dim[3])
    mp_3 = nn.MaxPool1d(pool_size, stride=pool_stride)(dcb_3)
    dcb_4 = diluted_conv_block(mp_3, feature_dim[3], feature_dim[4])
    mp_4 = nn.MaxPool1d(pool_size, stride=pool_stride)(dcb_4)
    dcb_5 = diluted_conv_block(mp_4, feature_dim[4], feature_dim[5])
    return dcb_5

tdcn_pose = time_diluted_conv_net(
    feature_dim = tdcn_dim_pose, 
    input_layer = input_pose, 
    pool_size = 2, 
    pool_stride = 2,
    )

# concat = useful_layers.ConcatLayer([tdcn_pose])
gap_layer = nn.AdaptiveMaxPool1d(1)(tdcn_pose)
# print(gap_layer.shape)
# linear_layer_1 = nn.Linear(gap_layer.shape[1], gap_layer.shape[1])(gap_layer)
linear_layer_1 = nn.Linear(1, 1)(gap_layer)
relu_layer = nn.ReLU()(linear_layer_1)
# linear_layer_2 = nn.Linear(gap_layer.shape[1], gap_layer.shape[1])(relu_layer)
linear_layer_2 = nn.Linear(1, 1)(relu_layer)
sigmoid_layer = nn.Sigmoid()(linear_layer_2)
reshape = sigmoid_layer
for _ in range(0, tdcn_pose.shape[2]-1):
    reshape = useful_layers.ConcatLayer(dim=2)(reshape, sigmoid_layer)
# print(reshape.shape)
# print(tdcn_pose.shape)
print((tdcn_pose*reshape).shape)
flatten = nn.Flatten()(tdcn_pose*reshape)
print(flatten.shape)
FC_l1 = nn.Linear(flatten.shape[1], 16)(flatten)(nn.ReLU())
FC_l2 = nn.Linear(FC_l1.shape[1], 12)(FC_l1)(nn.ReLU())
FC_l3 = nn.Linear(FC_l2.shape[1], 8)(FC_l2)(nn.ReLU())
last_layer = nn.Linear(FC_l3.shape[1], 2)(FC_l3)(nn.Sigmoid())
output = nn.Softmax(1)(last_layer)
# output = nn.Linear(FC_l3.shape[1], 1)(FC_l3)

torch.Size([1, 64, 312])
torch.Size([1, 19968])


In [20]:
device = "cpu"

model = SymbolicModel(inputs=input_pose, outputs=output).to(device)

loss_fn = nn.CrossEntropyLoss()
# loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=2e-5, momentum=0.9)
# model.summary()

In [21]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        # pred = model(X)[0]
        loss = loss_fn(pred, y.long())
        # loss = loss_fn(pred, y.float())

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [22]:
def dev(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred=model(X)
            # pred = model(X)[0]
            test_loss += loss_fn(pred, y.long()).item()
            # test_loss += loss_fn(pred, y.float()).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            # correct += ((pred > 0.5) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [38]:
def test(dataloader, model):
    # size = len(dataloader.dataset)
    model.eval()
    preds = []
    threshold = 0.5
    accuracy = BinaryAccuracy(threshold=threshold)
    precision = BinaryPrecision(threshold=threshold)
    recall = BinaryRecall(threshold=threshold)
    f1score = BinaryF1Score(threshold=threshold)
    cm = BinaryConfusionMatrix(threshold=threshold)
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            preds.append(pred)
            pred = torch.unsqueeze(torch.argmax(pred), 0)
            # pred = model(X)[0]
            accuracy.update(pred, y)
            precision.update(pred, y)
            recall.update(pred, y)
            f1score.update(pred, y)
            cm.update(pred, y)
    f_acc = accuracy.compute()
    f_pre = precision.compute()
    f_rec = recall.compute()
    f_f1s = f1score.compute()
    f_cm = cm.compute()
    return [f_acc,f_pre,f_rec,f_f1s,f_cm,preds]
            

In [24]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    dev(dev_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.715810  [    1/  107]
loss: 0.668932  [  101/  107]
Test Error: 
 Accuracy: 34.3%, Avg loss: 0.700787 

Epoch 2
-------------------------------
loss: 0.716018  [    1/  107]
loss: 0.716078  [  101/  107]
Test Error: 
 Accuracy: 34.3%, Avg loss: 0.700586 

Epoch 3
-------------------------------
loss: 0.669173  [    1/  107]
loss: 0.671814  [  101/  107]
Test Error: 
 Accuracy: 34.3%, Avg loss: 0.700582 

Epoch 4
-------------------------------
loss: 0.715042  [    1/  107]
loss: 0.715297  [  101/  107]
Test Error: 
 Accuracy: 34.3%, Avg loss: 0.700399 

Epoch 5
-------------------------------
loss: 0.716617  [    1/  107]
loss: 0.714244  [  101/  107]
Test Error: 
 Accuracy: 34.3%, Avg loss: 0.700295 

Done!


In [39]:
metrics = test(dev_dataloader, model)
print(metrics)

[tensor(0.2979), tensor(0.2979), tensor(1.), tensor(0.4590), tensor([[ 0., 33.],
        [ 0., 14.]]), [tensor([[0.4787, 0.5213]]), tensor([[0.4764, 0.5236]]), tensor([[0.4769, 0.5231]]), tensor([[0.4788, 0.5212]]), tensor([[0.4752, 0.5248]]), tensor([[0.4802, 0.5198]]), tensor([[0.4739, 0.5261]]), tensor([[0.4822, 0.5178]]), tensor([[0.4766, 0.5234]]), tensor([[0.4713, 0.5287]]), tensor([[0.4759, 0.5241]]), tensor([[0.4786, 0.5214]]), tensor([[0.4748, 0.5252]]), tensor([[0.4761, 0.5239]]), tensor([[0.4706, 0.5294]]), tensor([[0.4788, 0.5212]]), tensor([[0.4740, 0.5260]]), tensor([[0.4731, 0.5269]]), tensor([[0.4779, 0.5221]]), tensor([[0.4705, 0.5295]]), tensor([[0.4788, 0.5212]]), tensor([[0.4749, 0.5251]]), tensor([[0.4751, 0.5249]]), tensor([[0.4760, 0.5240]]), tensor([[0.4750, 0.5250]]), tensor([[0.4789, 0.5211]]), tensor([[0.4788, 0.5212]]), tensor([[0.4770, 0.5230]]), tensor([[0.4733, 0.5267]]), tensor([[0.4728, 0.5272]]), tensor([[0.4761, 0.5239]]), tensor([[0.4775, 0.5225]]), 