# Imports & get data

In [1]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [3]:
from comet_ml import Experiment
from comet_ml.integration.pytorch import log_model

In [4]:
import os
import pickle
from tqdm import tqdm
from copy import deepcopy

import numpy as np
import pandas as pd

from sklearn.metrics import f1_score

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader,Dataset

In [9]:
with open('fea_notebooks/features_newvf2.pickle', 'rb') as handle:
    data=pickle.load(handle)
print(len(data))

2941546


In [14]:
print(f"Torch: {torch.__version__}")
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(device)

Torch: 2.0.1+cu118
cuda:0


# Action Unit Detection
## Get target data

In [15]:
idx_to_class_2={0: 'Neutral', 1:'Anger', 2:'Disgust', 3:'Fear', 
                4:'Happiness', 5:'Sadness', 6:'Surprise', 7:'Other'} #ABAW
classes = ['Neutral', 'Anger', 'Disgust', 'Fear', 
           'Happiness', 'Sadness', 'Surprise', 'Other']

In [16]:
DATA_DIR = '/home/HDD6TB/datasets/emotions/ABAW/ABAW_6/6th_ABAW_Annotations'

def get_image2Expr(dirname):
    dirpath=os.path.join(DATA_DIR,'AU_Detection_Challenge/',dirname)
    num_missed=[]
    targets = {}
    folders = []
    for filename in os.listdir(dirpath):
        fn, ext = os.path.splitext(os.path.basename(filename))
        fn_short=fn
        if fn.endswith('_left'):
            fn_short=fn[:-5]
        elif fn.endswith('_right'):
            fn_short=fn[:-6]
    
        if ext.lower()=='.txt':
            folders.append(fn)
            with open(os.path.join(dirpath,filename)) as f:
                lines = f.read().splitlines()
                for i,line in enumerate(lines):
                    if i>0:
                        splitted_line=line.split(',')
                        aus=list(map(int,splitted_line))
                        if min(aus)>=0:
                            imagename_short=fn_short+'/'+str(i).zfill(5)+'.jpg'
                            imagename=fn+'/'+str(i).zfill(5)+'.jpg'
                            has_image=imagename_short in data
                            
                            if dirname=='Validation_Set':
                                has_frame=os.path.exists(os.path.join(DATA_DIR,
                                                                      'cropped_aligned/cropped_aligned',
                                                                      imagename))
                                has_frame=has_frame or os.path.exists(os.path.join(DATA_DIR,
                                                                    'cropped_aligned/cropped_aligned_new_50_vids',
                                                                                   imagename))
                                if has_image:
                                    has_image=has_frame
                                elif has_frame:
                                    imagename=fn_short+'/'+get_names(i-1)+'.jpg'
                                    has_image=imagename in data
                                    
                            if has_image:
                                targets[imagename] = aus
                            else:
                                num_missed.append(imagename)
                            
                        
    print(len(targets), len(num_missed))
    return targets, num_missed, folders

y_train, num_missed_train, train_f = get_image2Expr('Train_Set')
y_val, num_missed_test, val_f =get_image2Expr('Validation_Set')

1336508 23015
445845 0


## Get train samples

In [18]:
experiment = Experiment(
  api_key="XhQqrLR91F7zW3AZ7LgVT3zp2",
  project_name="abaw6",
  workspace="annanet"
)

experiment.set_name('au-tcn_attention+only_vid')
experiment.add_tags(['au_classif_tcn - tcn_attention+only_vid', 'v2_tcn+only_vid'])

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/home/hse_student/apsidorova' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/annanet/abaw6/ae2ed106d039429fa7201980d5a6d1b3



In [19]:
def train_val_split(y_train, y_val):
    w2v2large_t, openl3_t, w2v2hub_t, y_t = \
    [], [], [], []
    X_t, X_v = [], []
    for key in y_train.keys():
        X_t.append(data[key]['frame'][0])
        y_t.append(y_train[key])

    w2v2large_v, openl3_v, w2v2hub_v, y_v = \
    [], [], [], []
    
    for key in y_val.keys():
        X_v.append(data[key]['frame'][0])
        y_v.append(y_val[key])

    print(len(X_t), len(w2v2large_t), len(openl3_t), len(w2v2hub_t), len(y_t),
          len(X_v), len(w2v2large_v), len(openl3_v), len(w2v2hub_v), len(y_v))
    
    return (np.array(X_t),np.array(w2v2large_t), 
            np.array(openl3_t), np.array(w2v2hub_t), 
            np.array(y_t),
            np.array(X_v), np.array(w2v2large_v), 
            np.array(openl3_v), np.array(w2v2hub_v), 
            np.array(y_v))

X_t, w2v2large_t, openl3_t, w2v2hub_t, y_t, \
X_v, w2v2large_v, openl3_v, w2v2hub_v, y_v = train_val_split(y_train, y_val)

1336508 0 0 0 1336508 445845 0 0 0 445845


In [20]:
from math import ceil


class audioDataset(Dataset):
    def __init__(self, names, values, y, window=300, step=200):
        self.data = values
        self.lenghts_of_seq = len(self.data[0])
        self.y = y
        
        self.names = names
        self.window = window
        self.step = step
        
        self.len = ceil((self.lenghts_of_seq - self.window) / self.step) + 1
        
    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        if idx == self.len-1:
            stride = [torch.from_numpy(i[-self.window::]) for i in self.data]
            y = self.y[-self.window::]
        
        else:
            stride = [torch.from_numpy(i[idx*self.step:idx*self.step + self.window]) for i in self.data]
            y = self.y[idx*self.step:idx*self.step + self.window]
            
        r_dict = dict([(i, j.to(device)) for i, j in zip(self.names, stride)])
        
        return r_dict, y

In [21]:
taudio = audioDataset(['frames'],
                     [X_t], y_t)
training_loader = DataLoader(taudio, batch_size=64, shuffle=False)

In [24]:
vaudio = audioDataset(['frames'],
                     [X_v], y_v,
                     window=300, step=300)
validation_loader = DataLoader(vaudio, batch_size=64, shuffle=False)
len(vaudio), len(validation_loader)

(1487, 24)

In [26]:
num_labels=y_t.shape[1]
print(num_labels)
from sklearn.utils.class_weight import compute_class_weight
class_weights = np.empty([num_labels, 2])
for i in range(num_labels):
    neg, pos = np.bincount(y_t[:, i])
    total = neg + pos
    weight_for_0 = (1 / neg) * (total / 2.0)
    weight_for_1 = (1 / pos) * (total / 2.0)

    class_weights[i][0]=weight_for_0
    class_weights[i][1]=weight_for_1
    #class_weights[i] = compute_class_weight('balanced', [0,1], y_train[:, i])
print(class_weights)

12
[[ 0.5676408   4.19599397]
 [ 0.52621781 10.03550136]
 [ 0.58741717  3.35984997]
 [ 0.67448898  1.93275527]
 [ 0.82961596  1.25845841]
 [ 0.76377734  1.44776905]
 [ 0.66014022  2.0611319 ]
 [ 0.51432323 17.95416443]
 [ 0.51636598 15.77559018]
 [ 0.51306002 19.64239734]
 [ 1.33713913  0.79863614]
 [ 0.54020709  6.71780849]]


## TCN

In [27]:
from tcn import TemporalConvNet
from trans_encoder import TransEncoder


class Model(nn.Module):
    def __init__(self, modality=['frames', 'w2v2large', 'openl3', 'w2v2hub'],
                 embedding_dim={'frames': 1280, 'w2v2large': 1024, 'openl3': 512,
                                'w2v2hub': 256},
                 tcn_channel={
                     'frames': [1280, 512, 256, 128],
                     'w2v2large': [1024, 512, 256, 128],
                     'openl3': [512, 256, 128],
                     'w2v2hub': [256, 128]
    }):
        super(Model, self).__init__()
        self.modality = modality

        self.temporal, self.fusion = nn.ModuleDict(), None

        for modal in self.modality:
            self.temporal[modal] = TemporalConvNet(num_inputs=embedding_dim[modal],
                                                   num_channels=tcn_channel[modal], dropout=0.3, attention=False)

        conv_dim = 0
        for m in self.modality:
            conv_dim += tcn_channel[m][-1]
            
        self.encoder = TransEncoder(
            inc=conv_dim, outc=256, dropout=0.3, nheads=4, 
            nlayer=8)
            
        self.head = nn.Sequential(
            nn.Linear(256, 256//2),
            nn.BatchNorm1d(256//2),
            nn.Linear(256//2, 12),
        )

    def forward(self, x):
        
        bs, seq_len, _ = x[self.modality[0]].shape
#         print(bs, seq_len)
        for m in self.modality:
            x[m] = x[m].transpose(1, 2)
            x[m] = self.temporal[m](x[m])

        feat_list = []
        for m in self.modality:
            feat_list.append(x[m])
        out = torch.cat(feat_list, dim=1)
        out = self.encoder(out)

        out = torch.transpose(out, 1, 0)
        out = torch.reshape(out, (bs*seq_len, -1))
#         print(out.shape)

        out = self.head(out)
        return F.sigmoid(out)

model = Model(modality=['frames'], embedding_dim={'frames': 1280},
              tcn_channel={
                     'frames': [1280, 512, 256, 128]})
model.to(device);

In [28]:
hyperparams = {
    'epochs': 100,
    'loss': 'categorical_crossentropy',
    'lr': 1e-4,
    'batch': 64
}

## Train

In [29]:
class_frequencies = y_t.sum(axis=0)  # Example class frequencies

# Compute class weights
total_samples = np.sum(class_frequencies)
class_weights = total_samples / (len(class_frequencies) * class_frequencies)

# Normalize the weights
normalized_weights = class_weights / np.sum(class_weights)
print("Class Weights:", normalized_weights)


Class Weights: [0.04926029 0.11781515 0.03944409 0.02269023 0.0147741  0.01699657
 0.02419735 0.21077897 0.18520286 0.23059855 0.00937586 0.07886598]


In [30]:
experiment.log_parameters(hyperparams)

loss_fn = nn.BCELoss(weight=torch.Tensor(normalized_weights)).to(device)
optimizer = optim.Adam(model.parameters(), lr=hyperparams['lr'])

best_model_params = deepcopy(model.state_dict())

best_vloss = 0

In [31]:
for epoch in range(hyperparams['epochs']):
    print('EPOCH {}:'.format(epoch + 1))
    pred_labels_train, pred_labels_val = [], []
    labels_train, labels_val = [], []

    model.train(True)
    
    running_loss = 0.
    last_loss = 0.

    for i, tdata in enumerate(training_loader):
        inputs, labels = tdata
#         print(labels.shape)
        labels = labels.reshape(labels.shape[0]*labels.shape[1], 12).to(device).to(torch.float32)
        optimizer.zero_grad()

        outputs = model(inputs).to(torch.float32)

        loss = loss_fn(outputs, labels)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        
        predicted = outputs >= 0.5
        pred_labels_train += predicted.tolist()
        labels_train += labels.tolist()
        
        del tdata
        del inputs
        del labels
        torch.cuda.empty_cache()
    
    avg_loss = running_loss / i + 1
    experiment.log_metric('loss_train', avg_loss, 
                          epoch=epoch)
    
    running_vloss = 0.0
    model.eval()
    with torch.no_grad():
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata
            vlabels = vlabels.reshape(vlabels.shape[0]*vlabels.shape[1], 12).to(device).to(torch.float32)
            voutputs = model(vinputs).to(torch.float32)
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss
            
            predicted = voutputs >= 0.5
            pred_labels_val += predicted.tolist()
            labels_val += vlabels.tolist()
            
            del vinputs
            del vlabels
            del vdata
            torch.cuda.empty_cache()

    avg_vloss = running_vloss / (i + 1)
    val_f1 = f1_score(labels_val,
                      pred_labels_val,
                      average='macro')
    print('LOSS train {} valid {}, f1_valid {}'.format(avg_loss, avg_vloss, val_f1))
    experiment.log_metric('loss_val', avg_vloss, 
                          epoch=epoch)
    experiment.log_metric('f1_val', f1_score(labels_val,
                                             pred_labels_val,
                                             average='macro'), 
                          epoch=epoch)
    experiment.log_metric('f1_train', f1_score(labels_train,
                                               pred_labels_train,
                                               average='macro'), 
                          epoch=epoch)
    
    if val_f1 > best_vloss:
        best_vloss = val_f1
        best_model_params = deepcopy(model.state_dict())

EPOCH 1:
LOSS train 1.0581288025356257 valid 0.054938580840826035, f1_valid 0.4203890406070508
EPOCH 2:
LOSS train 1.0555026049797351 valid 0.052813004702329636, f1_valid 0.45734813269999064
EPOCH 3:
LOSS train 1.0532442908734083 valid 0.052626147866249084, f1_valid 0.45378812383079886
EPOCH 4:
LOSS train 1.0507661618578892 valid 0.05199640616774559, f1_valid 0.46491715382829835
EPOCH 5:
LOSS train 1.0475232141952102 valid 0.04940766096115112, f1_valid 0.47945981018257156
EPOCH 6:
LOSS train 1.0437838695585155 valid 0.04869481548666954, f1_valid 0.38035818078821565
EPOCH 7:
LOSS train 1.0399187770672143 valid 0.04167936369776726, f1_valid 0.47671726715438895
EPOCH 8:
LOSS train 1.0351738707663922 valid 0.03583599999547005, f1_valid 0.4762320873938961
EPOCH 9:
LOSS train 1.03093066596641 valid 0.031756747514009476, f1_valid 0.45533031652625433
EPOCH 10:
LOSS train 1.0273200672370597 valid 0.027871619910001755, f1_valid 0.4642095540545541
EPOCH 11:
LOSS train 1.0241816272565092 valid 0.0

EPOCH 87:
LOSS train 1.0056376429364229 valid 0.020360615104436874, f1_valid 0.5004861708402742
EPOCH 88:
LOSS train 1.0067766384163406 valid 0.021262196823954582, f1_valid 0.5170883036005421
EPOCH 89:
LOSS train 1.0062005240917922 valid 0.01980806514620781, f1_valid 0.5049342348412177
EPOCH 90:
LOSS train 1.0063066433918162 valid 0.021143442019820213, f1_valid 0.49932607324861183
EPOCH 91:
LOSS train 1.005685668483448 valid 0.021048245951533318, f1_valid 0.5163708483738879
EPOCH 92:
LOSS train 1.0053922819385592 valid 0.020866943523287773, f1_valid 0.5209084849642078
EPOCH 93:
LOSS train 1.0051987042780428 valid 0.02161431312561035, f1_valid 0.5104129010186609
EPOCH 94:
LOSS train 1.0051621683568766 valid 0.020858921110630035, f1_valid 0.5127444748104263
EPOCH 95:
LOSS train 1.0050634474430877 valid 0.021566763520240784, f1_valid 0.4947461244526925
EPOCH 96:
LOSS train 1.0049856732948683 valid 0.021650217473506927, f1_valid 0.4964087180875594
EPOCH 97:
LOSS train 1.0050155714673634 va

In [32]:
model.load_state_dict(best_model_params)
pred_labels_val, labels_val = [], []

model.eval()
with torch.no_grad():
    for i, vdata in enumerate(tqdm(validation_loader)):
        vinputs, vlabels = vdata
        vlabels = vlabels.reshape(vlabels.shape[0]*vlabels.shape[1], 12).to(device).to(torch.float32)
        voutputs = model(vinputs).to(torch.float32)

        predicted = voutputs >= 0.5
        pred_labels_val += predicted.tolist()
        labels_val += vlabels.tolist()

        del vinputs
        del vlabels
        del vdata
        torch.cuda.empty_cache()

100%|██████████| 24/24 [00:02<00:00,  8.72it/s]


In [33]:
f1 = f1_score(labels_val,
        pred_labels_val,
        average='macro')

In [35]:
(np.array(labels_val)==np.array(pred_labels_val)).mean()

0.8827109018904581

In [36]:
f1

0.5249778203411767

In [37]:
torch.save(model.state_dict(), 'simplemodel.pt')
experiment.log_model("simplemodel.pt", file_or_folder="simplemodel.pt")

{'web': 'https://www.comet.com/api/asset/download?assetId=cb423bac2950493eb150968b644a2d88&experimentKey=ae2ed106d039429fa7201980d5a6d1b3',
 'api': 'https://www.comet.com/api/rest/v2/experiment/asset/get-asset?assetId=cb423bac2950493eb150968b644a2d88&experimentKey=ae2ed106d039429fa7201980d5a6d1b3',
 'assetId': 'cb423bac2950493eb150968b644a2d88'}

In [38]:
experiment.end()

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/annanet/abaw6/ae2ed106d039429fa7201980d5a6d1b3
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     f1_train [100]   : (0.3215614208145707, 0.8368070894883383)
[1;38;5;39mCOMET INFO:[0m     f1_val [100]     : (0.38035818078821565, 0.5249778203411767)
[1;38;5;39mCOMET INFO:[0m     loss [1050]      : (0.001998973311856389, 0.061403483152389526)
[1;38;5;39mCOMET INFO:[0m     loss_train [100] : (1.004955767785969, 1.0581288025356257)
[1;38;5;39mCOMET INFO:[0m     loss_val [100]   : (0.016075514