In [34]:
from __future__ import absolute_import, division, print_function

import csv
import os
import random
import pickle
import sys
import numpy as np
from typing import *

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score, f1_score

import wandb
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange

from torch.nn import CrossEntropyLoss, L1Loss, MSELoss
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import matthews_corrcoef
from transformers import BertTokenizer, XLNetTokenizer, BertForSequenceClassification, get_linear_schedule_with_warmup
from transformers.models.bert.configuration_bert import BertConfig
from transformers.optimization import AdamW
from bert import MAG_BertForSequenceClassification
from xlnet import MAG_XLNetForSequenceClassification

from argparse_utils import str2bool, seed
from global_configs import ACOUSTIC_DIM, VISUAL_DIM, DEVICE

In [35]:
import easydict

args = easydict.EasyDict({
    "dataset": "mosi",
    "max_seq_length": 50,
    "train_batch_size": 48,
    "dev_batch_size" : 128,
    "test_batch_size": 128,
    "n_epochs": 40,
    "beta_shift": 1.0,
    "dropout_prob": 0.5,
    "model": "bert-base-uncased",
    "learning_rate": 1e-5,
    "gradient_accumulation_step": 1,
    "warmup_proportion": 0.1,
    "seed": seed("random")
})

In [36]:
class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, input_ids, visual, acoustic, input_mask, segment_ids, label_id):
        self.input_ids = input_ids
        self.visual = visual
        self.acoustic = acoustic
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_id = label_id

In [37]:
class MultimodalConfig(object):
    def __init__(self, beta_shift, dropout_prob):
        self.beta_shift = beta_shift
        self.dropout_prob = dropout_prob

In [38]:
def convert_to_features(examples, max_seq_length, tokenizer):
    features = []

    for (ex_index, example) in enumerate(examples):

        (words, visual, acoustic), label_id, segment = example

        tokens, inversions = [], []
        for idx, word in enumerate(words):
            tokenized = tokenizer.tokenize(word)
            tokens.extend(tokenized)
            inversions.extend([idx] * len(tokenized))

        # Check inversion
        assert len(tokens) == len(inversions)

        aligned_visual = []
        aligned_audio = []

        for inv_idx in inversions:
            aligned_visual.append(visual[inv_idx, :])
            aligned_audio.append(acoustic[inv_idx, :])

        visual = np.array(aligned_visual)
        acoustic = np.array(aligned_audio)

        # Truncate input if necessary
        if len(tokens) > max_seq_length - 2:
            tokens = tokens[: max_seq_length - 2]
            acoustic = acoustic[: max_seq_length - 2]
            visual = visual[: max_seq_length - 2]

        if args.model == "bert-base-uncased":
            prepare_input = prepare_bert_input
        elif args.model == "xlnet-base-cased":
            prepare_input = prepare_xlnet_input

        input_ids, visual, acoustic, input_mask, segment_ids = prepare_input(
            tokens, visual, acoustic, tokenizer
        )

        # Check input length
        assert len(input_ids) == args.max_seq_length
        assert len(input_mask) == args.max_seq_length
        assert len(segment_ids) == args.max_seq_length
        assert acoustic.shape[0] == args.max_seq_length
        assert visual.shape[0] == args.max_seq_length

        features.append(
            InputFeatures(
                input_ids=input_ids,
                input_mask=input_mask,
                segment_ids=segment_ids,
                visual=visual,
                acoustic=acoustic,
                label_id=label_id,
            )
        )
    return features


def prepare_bert_input(tokens, visual, acoustic, tokenizer):
    CLS = tokenizer.cls_token
    SEP = tokenizer.sep_token
    tokens = [CLS] + tokens + [SEP]

    # Pad zero vectors for acoustic / visual vectors to account for [CLS] / [SEP] tokens
    acoustic_zero = np.zeros((1, ACOUSTIC_DIM))
    acoustic = np.concatenate((acoustic_zero, acoustic, acoustic_zero))
    visual_zero = np.zeros((1, VISUAL_DIM))
    visual = np.concatenate((visual_zero, visual, visual_zero))

    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    segment_ids = [0] * len(input_ids)
    input_mask = [1] * len(input_ids)

    pad_length = args.max_seq_length - len(input_ids)

    acoustic_padding = np.zeros((pad_length, ACOUSTIC_DIM))
    acoustic = np.concatenate((acoustic, acoustic_padding))

    visual_padding = np.zeros((pad_length, VISUAL_DIM))
    visual = np.concatenate((visual, visual_padding))

    padding = [0] * pad_length

    # Pad inputs
    input_ids += padding
    input_mask += padding
    segment_ids += padding

    return input_ids, visual, acoustic, input_mask, segment_ids


def prepare_xlnet_input(tokens, visual, acoustic, tokenizer):
    CLS = tokenizer.cls_token
    SEP = tokenizer.sep_token
    PAD_ID = tokenizer.pad_token_id

    # PAD special tokens
    tokens = tokens + [SEP] + [CLS]
    audio_zero = np.zeros((1, ACOUSTIC_DIM))
    acoustic = np.concatenate((acoustic, audio_zero, audio_zero))
    visual_zero = np.zeros((1, VISUAL_DIM))
    visual = np.concatenate((visual, visual_zero, visual_zero))

    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_ids)
    segment_ids = [0] * (len(tokens) - 1) + [2]

    pad_length = (args.max_seq_length - len(segment_ids))

    # then zero pad the visual and acoustic
    audio_padding = np.zeros((pad_length, ACOUSTIC_DIM))
    acoustic = np.concatenate((audio_padding, acoustic))

    video_padding = np.zeros((pad_length, VISUAL_DIM))
    visual = np.concatenate((video_padding, visual))

    input_ids = [PAD_ID] * pad_length + input_ids
    input_mask = [0] * pad_length + input_mask
    segment_ids = [3] * pad_length + segment_ids

    return input_ids, visual, acoustic, input_mask, segment_ids


def get_tokenizer(model):
    if model == "bert-base-uncased":
        return BertTokenizer.from_pretrained(model)
    elif model == "xlnet-base-cased":
        return XLNetTokenizer.from_pretrained(model)
    else:
        raise ValueError(
            "Expected 'bert-base-uncased' or 'xlnet-base-cased, but received {}".format(
                model
            )
        )


def get_appropriate_dataset(data):

    tokenizer = get_tokenizer(args.model)

    features = convert_to_features(data, args.max_seq_length, tokenizer)
    all_input_ids = torch.tensor(
        [f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor(
        [f.input_mask for f in features], dtype=torch.long)
    all_segment_ids = torch.tensor(
        [f.segment_ids for f in features], dtype=torch.long)
    all_visual = torch.tensor([f.visual for f in features], dtype=torch.float)
    all_acoustic = torch.tensor(
        [f.acoustic for f in features], dtype=torch.float)
    all_label_ids = torch.tensor(
        [f.label_id for f in features], dtype=torch.float)

    dataset = TensorDataset(
        all_input_ids,
        all_visual,
        all_acoustic,
        all_input_mask,
        all_segment_ids,
        all_label_ids,
    )
    return dataset, tokenizer


def set_up_data_loader():
    with open(f"../datasets/MOSI/{args.dataset}.pkl", "rb") as handle:
        data = pickle.load(handle)

    train_data = data["train"]
    dev_data = data["dev"]
    test_data = data["test"]

    train_dataset, train_tokenizer = get_appropriate_dataset(train_data)
    dev_dataset, dev_tokenizer = get_appropriate_dataset(dev_data)
    test_dataset, test_tokenizer = get_appropriate_dataset(test_data)

    num_train_optimization_steps = (
        int(
            len(train_dataset) / args.train_batch_size /
            args.gradient_accumulation_step
        )
        * args.n_epochs
    )

    train_dataloader = DataLoader(
        train_dataset, batch_size=args.train_batch_size, shuffle=True
    )

    dev_dataloader = DataLoader(
        dev_dataset, batch_size=args.dev_batch_size, shuffle=True
    )

    test_dataloader = DataLoader(
        test_dataset, batch_size=args.test_batch_size, shuffle=True,
    )

    return (
        train_dataloader,
        dev_dataloader,
        test_dataloader,
        num_train_optimization_steps,
        train_tokenizer,
        dev_tokenizer,
        test_tokenizer
    )


def set_random_seed(seed: int):
    """
    Helper function to seed experiment for reproducibility.
    If -1 is provided as seed, experiment uses random seed from 0~9999

    Args:
        seed (int): integer to be used as seed, use -1 to randomly seed experiment
    """
    print("Seed: {}".format(seed))

    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.deterministic = True

    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def prep_for_training(num_train_optimization_steps: int):
    multimodal_config = MultimodalConfig(
        beta_shift=args.beta_shift, dropout_prob=args.dropout_prob
    )
    bert_config = BertConfig(
        hidden_dropout_prob=args.dropout_prob
    )

    if args.model == "bert-base-uncased":
        ## Fusion all modalities
        # model = MAG_BertForSequenceClassification.from_pretrained(
        #     args.model, multimodal_config=multimodal_config, num_labels=1,
        # )

        ## Ablation Study for visual, acoustic exception
        model = BertForSequenceClassification.from_pretrained(
            args.model, num_labels=1
        )
    elif args.model == "xlnet-base-cased":
        model = MAG_XLNetForSequenceClassification.from_pretrained(
            args.model, multimodal_config=multimodal_config, num_labels=1
        )

    model.to(DEVICE)

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.01,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]

    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_train_optimization_steps,
        num_training_steps=args.warmup_proportion * num_train_optimization_steps,
    )
    return model, optimizer, scheduler


def train_epoch(model: nn.Module, train_dataloader: DataLoader, optimizer, scheduler):
    model.train()
    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
        batch = tuple(t.to(DEVICE) for t in batch)
        input_ids, visual, acoustic, input_mask, segment_ids, label_ids = batch
        visual = torch.squeeze(visual, 1)
        acoustic = torch.squeeze(acoustic, 1)
        model.zero_grad()
        # outputs = model(
        #     input_ids,
        #     visual,
        #     acoustic,
        #     token_type_ids=segment_ids,
        #     attention_mask=input_mask,
        #     labels=None
        # )
        outputs = model(
            input_ids,
            token_type_ids=segment_ids,
            attention_mask=input_mask,
            labels=label_ids
        )

        # logits = outputs[0]
        # loss_fct = MSELoss()
        # loss = loss_fct(logits.view(-1), label_ids.view(-1))

        loss = outputs.loss

        if args.gradient_accumulation_step > 1:
            loss = loss / args.gradient_accumulation_step

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        tr_loss += loss.item()
        nb_tr_steps += 1

        if (step + 1) % args.gradient_accumulation_step == 0:
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

    return tr_loss / nb_tr_steps


def eval_epoch(model: nn.Module, dev_dataloader: DataLoader, optimizer):
    model.eval()
    dev_loss = 0
    nb_dev_examples, nb_dev_steps = 0, 0
    with torch.no_grad():
        for step, batch in enumerate(tqdm(dev_dataloader, desc="Iteration")):
            batch = tuple(t.to(DEVICE) for t in batch)

            input_ids, visual, acoustic, input_mask, segment_ids, label_ids = batch
            visual = torch.squeeze(visual, 1)
            acoustic = torch.squeeze(acoustic, 1)
            # outputs = model(
            #     input_ids,
            #     visual,
            #     acoustic,
            #     token_type_ids=segment_ids,
            #     attention_mask=input_mask,
            #     labels=None
            # )
            # logits = outputs[0]

            # loss_fct = MSELoss()
            # loss = loss_fct(logits.view(-1), label_ids.view(-1))

            outputs = model(
            input_ids,
            token_type_ids=segment_ids,
            attention_mask=input_mask,
            labels=label_ids
            )
            loss = outputs.loss

            if args.gradient_accumulation_step > 1:
                loss = loss / args.gradient_accumulation_step

            dev_loss += loss.item()
            nb_dev_steps += 1

    return dev_loss / nb_dev_steps


def test_epoch(model: nn.Module, test_dataloader: DataLoader, tokenizer):
    model.eval()
    preds = []
    labels = []
    
    with torch.no_grad():
        for i, batch in enumerate(tqdm(test_dataloader)):
            batch = tuple(t.to(DEVICE) for t in batch)

            input_ids, visual, acoustic, input_mask, segment_ids, label_ids = batch
            visual = torch.squeeze(visual, 1)
            acoustic = torch.squeeze(acoustic, 1)
            # outputs = model(
            #     input_ids,
            #     visual,
            #     acoustic,
            #     token_type_ids=segment_ids,
            #     attention_mask=input_mask,
            #     labels=None
            # )

            outputs = model(
            input_ids,
            token_type_ids=segment_ids,
            attention_mask=input_mask,
            labels=None
            )

            logits = outputs[0]

            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.detach().cpu().numpy()

            logits = np.squeeze(logits).tolist()
            label_ids = np.squeeze(label_ids).tolist()

            preds.extend(logits)
            labels.extend(label_ids)

            # print(i, " th batch")
            # for i, s in enumerate(input_ids):
            #     tokens = tokenizer.convert_ids_to_tokens(s, skip_special_tokens = True)
            #     print(tokens, logits[i], label_ids[i])

        preds = np.array(preds)
        labels = np.array(labels)

        # for s in input_ids:
        #     tokens = tokenizer.convert_ids_to_tokens(s)
        #     print(tokens)
        # print(preds)
        # print(labels)
        # print("--------------------------------------------------------------------")

    return preds, labels


def test_score_model(model: nn.Module, test_dataloader: DataLoader, tokenizer, use_zero=False):

    preds, y_test = test_epoch(model, test_dataloader, tokenizer)
    non_zeros = np.array(
        [i for i, e in enumerate(y_test) if e != 0 or use_zero])

    preds = preds[non_zeros]
    y_test = y_test[non_zeros]

    mae = np.mean(np.absolute(preds - y_test))
    corr = np.corrcoef(preds, y_test)[0][1]

    preds = preds >= 0
    y_test = y_test >= 0

    f_score = f1_score(y_test, preds, average="weighted")
    acc = accuracy_score(y_test, preds)

    return acc, mae, corr, f_score


def test_instance(model: nn.Module, test_tokenizer):
    model.eval()
    segment_list = []
    words_list = []
    preds = []
    preds_2 = []
    preds_7 = []
    labels = []
    labels_2 = []
    labels_7 = []

    with open(f"../datasets/MOSI/{args.dataset}.pkl", "rb") as handle:
        data = pickle.load(handle)

    # test_data[idx] = (words, visual, acoustic), label, segment
    test_data = data["test"]
    test_dataset, test_tokenizer = get_appropriate_dataset(test_data)
    test_dataloader = DataLoader(
        test_dataset, batch_size=args.test_batch_size, shuffle=False,
    )

    video = set()
    count = 0

    for idx in range(len(test_data)):
        (words, visual, acoustic), label, segment = test_data[idx]
        if args.dataset == 'mosi':
            segment_list.append(segment)
        else:
            video_name = segment[0]
            if video_name in video:
                count += 1
            else:
                video.add(video_name)
                count = 0
            segment_list.append(video_name + '[' + str(count) + ']')

        words_list.append(words)
        labels.append(label[0][0])

        # label_2 appending
        if label > 0:
            labels_2.append('positive')
        else:
            labels_2.append('negative')
        
        # label_7 appending
        if label < -15/7:
            labels_7.append('very negative')
        elif label < -9/7:
            labels_7.append('negative')
        elif label < -3/7:
            labels_7.append('slightly negative')
        elif label < 3/7:
            labels_7.append('Neutral')
        elif label < 9/7:
            labels_7.append('slightly positive')
        elif label < 15/7:
            labels_7.append('positive')
        else:
            labels_7.append('very positive')
            
    # prediction
    with torch.no_grad():
        for i, batch in enumerate(tqdm(test_dataloader)):
            batch = tuple(t.to(DEVICE) for t in batch)

            input_ids, visual, acoustic, input_mask, segment_ids, label_ids = batch
            visual = torch.squeeze(visual, 1)
            acoustic = torch.squeeze(acoustic, 1)
            outputs = model(
                input_ids,
                visual,
                acoustic,
                token_type_ids=segment_ids,
                attention_mask=input_mask,
                labels=None
            )
            logits = outputs[0]

            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.detach().cpu().numpy()

            logits = np.squeeze(logits).tolist()
            label_ids = np.squeeze(label_ids).tolist()

            preds.extend(logits)

            for logit in logits:
                # preds_2 appending
                if logit > 0:
                    preds_2.append('positive')
                else:
                    preds_2.append('negative')

                # label_7 appending
                if logit < -15/7:
                    preds_7.append('very negative')
                elif logit < -9/7:
                    preds_7.append('negative')
                elif logit < -3/7:
                    preds_7.append('slightly negative')
                elif logit < 3/7:
                    preds_7.append('Neutral')
                elif logit < 9/7:
                    preds_7.append('slightly positive')
                elif logit < 15/7:
                    preds_7.append('positive')
                else:
                    preds_7.append('very positive')

            

    count = 0
    for i in range(len(segment_list)):
        print(i, "th data")
        print(segment_list[i])
        print(words_list[i])
        print(labels[i])
        print(labels_2[i])
        print(labels_7[i])
        print(preds[i])
        print(preds_2[i])
        print(preds_7[i])


def train(
    model,
    train_dataloader,
    validation_dataloader,
    test_data_loader,
    optimizer,
    scheduler,
    tokenizer
):
    valid_losses = []
    test_accuracies = []

    for epoch_i in range(int(args.n_epochs)):
        train_loss = train_epoch(model, train_dataloader, optimizer, scheduler)
        valid_loss = eval_epoch(model, validation_dataloader, optimizer)
        test_acc, test_mae, test_corr, test_f_score = test_score_model(
            model, test_data_loader, tokenizer
        )

        print(
            "epoch:{}, train_loss:{}, valid_loss:{}, test_acc:{}".format(
                epoch_i, train_loss, valid_loss, test_acc
            )
        )

        valid_losses.append(valid_loss)
        test_accuracies.append(test_acc)
    
    print("Total Result:")
    print("best_accuracy: ", sorted(test_accuracies)[-1])
    print("best loss: ", sorted(valid_losses)[0])
    
    return model


In [39]:
set_random_seed(args.seed)

Seed: 1690


In [18]:
(
    train_data_loader,
    dev_data_loader,
    test_data_loader,
    num_train_optimization_steps,
    train_tokenizer,
    dev_tokenizer,
    test_tokenizer
) = set_up_data_loader()

model, optimizer, scheduler = prep_for_training(
    num_train_optimization_steps)

model = train(
    model,
    train_data_loader,
    dev_data_loader,
    test_data_loader,
    optimizer,
    scheduler,
    test_tokenizer
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

epoch:0, train_loss:2.329716187936288, valid_loss:2.8119139671325684, test_acc:0.5679389312977099


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.16it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 21.00it/s]
100%|██████████| 6/6 [00:00<00:00, 20.48it/s]


epoch:1, train_loss:2.32402644334016, valid_loss:2.7578238248825073, test_acc:0.5603053435114503


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.16it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 21.19it/s]
100%|██████████| 6/6 [00:00<00:00, 20.42it/s]


epoch:2, train_loss:2.314677432731346, valid_loss:2.742027521133423, test_acc:0.549618320610687


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.24it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.89it/s]
100%|██████████| 6/6 [00:00<00:00, 20.49it/s]


epoch:3, train_loss:2.2994151027114302, valid_loss:2.6746902465820312, test_acc:0.5557251908396946


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.22it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 21.01it/s]
100%|██████████| 6/6 [00:00<00:00, 20.43it/s]


epoch:4, train_loss:2.2562954469963357, valid_loss:2.690970301628113, test_acc:0.5557251908396946


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.07it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 21.16it/s]
100%|██████████| 6/6 [00:00<00:00, 20.35it/s]


epoch:5, train_loss:2.221479566008956, valid_loss:2.646364212036133, test_acc:0.5679389312977099


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.15it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.94it/s]
100%|██████████| 6/6 [00:00<00:00, 20.40it/s]


epoch:6, train_loss:2.178174133653994, valid_loss:2.605894446372986, test_acc:0.5770992366412214


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.12it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 21.04it/s]
100%|██████████| 6/6 [00:00<00:00, 20.33it/s]


epoch:7, train_loss:2.115795139913206, valid_loss:2.5435848236083984, test_acc:0.5740458015267176


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.07it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.81it/s]
100%|██████████| 6/6 [00:00<00:00, 20.09it/s]


epoch:8, train_loss:2.0419098889386214, valid_loss:2.4213231801986694, test_acc:0.5755725190839694


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.07it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.83it/s]
100%|██████████| 6/6 [00:00<00:00, 20.22it/s]


epoch:9, train_loss:1.8892787120960377, valid_loss:2.2728978395462036, test_acc:0.5877862595419847


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.08it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.82it/s]
100%|██████████| 6/6 [00:00<00:00, 20.01it/s]


epoch:10, train_loss:1.7118228762238115, valid_loss:1.9809101819992065, test_acc:0.7053435114503817


Iteration: 100%|██████████| 27/27 [00:02<00:00, 12.00it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.88it/s]
100%|██████████| 6/6 [00:00<00:00, 20.24it/s]


epoch:11, train_loss:1.4276138610310025, valid_loss:1.6669862270355225, test_acc:0.7862595419847328


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.94it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.70it/s]
100%|██████████| 6/6 [00:00<00:00, 20.12it/s]


epoch:12, train_loss:1.08340882813489, valid_loss:1.4148651957511902, test_acc:0.8


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.93it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.90it/s]
100%|██████████| 6/6 [00:00<00:00, 20.16it/s]


epoch:13, train_loss:0.8684028011781199, valid_loss:1.3697199821472168, test_acc:0.8213740458015267


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.90it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.82it/s]
100%|██████████| 6/6 [00:00<00:00, 20.12it/s]


epoch:14, train_loss:0.742679441416705, valid_loss:1.3278641104698181, test_acc:0.8122137404580153


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.96it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.75it/s]
100%|██████████| 6/6 [00:00<00:00, 20.09it/s]


epoch:15, train_loss:0.626339974226775, valid_loss:1.2393858432769775, test_acc:0.8183206106870229


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.95it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.73it/s]
100%|██████████| 6/6 [00:00<00:00, 20.05it/s]


epoch:16, train_loss:0.5253170949441416, valid_loss:1.2376623749732971, test_acc:0.8366412213740458


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.91it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.63it/s]
100%|██████████| 6/6 [00:00<00:00, 19.98it/s]


epoch:17, train_loss:0.45564576983451843, valid_loss:1.1919048130512238, test_acc:0.8381679389312977


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.90it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.65it/s]
100%|██████████| 6/6 [00:00<00:00, 20.03it/s]


epoch:18, train_loss:0.39525222612751854, valid_loss:1.1992638111114502, test_acc:0.8244274809160306


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.93it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.54it/s]
100%|██████████| 6/6 [00:00<00:00, 20.02it/s]


epoch:19, train_loss:0.33568577468395233, valid_loss:1.294867753982544, test_acc:0.8412213740458016


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.88it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.91it/s]
100%|██████████| 6/6 [00:00<00:00, 19.90it/s]


epoch:20, train_loss:0.2637648041601534, valid_loss:1.2500019073486328, test_acc:0.8305343511450382


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.85it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.28it/s]
100%|██████████| 6/6 [00:00<00:00, 19.93it/s]


epoch:21, train_loss:0.2489098713353828, valid_loss:1.1934056282043457, test_acc:0.8274809160305343


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.86it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.55it/s]
100%|██████████| 6/6 [00:00<00:00, 19.93it/s]


epoch:22, train_loss:0.2098961196563862, valid_loss:1.1805788278579712, test_acc:0.8366412213740458


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.82it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.49it/s]
100%|██████████| 6/6 [00:00<00:00, 19.80it/s]


epoch:23, train_loss:0.1793701601801095, valid_loss:1.1781408190727234, test_acc:0.8305343511450382


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.82it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.49it/s]
100%|██████████| 6/6 [00:00<00:00, 19.84it/s]


epoch:24, train_loss:0.16602684336679954, valid_loss:1.1586021780967712, test_acc:0.8290076335877863


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.77it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.36it/s]
100%|██████████| 6/6 [00:00<00:00, 19.83it/s]


epoch:25, train_loss:0.14238767160309684, valid_loss:1.1096620559692383, test_acc:0.8244274809160306


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.82it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.35it/s]
100%|██████████| 6/6 [00:00<00:00, 19.80it/s]


epoch:26, train_loss:0.13217036094930437, valid_loss:1.1418929398059845, test_acc:0.8351145038167939


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.79it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.56it/s]
100%|██████████| 6/6 [00:00<00:00, 19.79it/s]


epoch:27, train_loss:0.12622509648402533, valid_loss:1.1337698698043823, test_acc:0.8320610687022901


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.78it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.31it/s]
100%|██████████| 6/6 [00:00<00:00, 19.81it/s]


epoch:28, train_loss:0.11591946402633632, valid_loss:1.1381986141204834, test_acc:0.8381679389312977


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.73it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.45it/s]
100%|██████████| 6/6 [00:00<00:00, 19.87it/s]


epoch:29, train_loss:0.10701232624274713, valid_loss:1.1694161295890808, test_acc:0.8396946564885496


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.78it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.47it/s]
100%|██████████| 6/6 [00:00<00:00, 19.80it/s]


epoch:30, train_loss:0.09374705978013852, valid_loss:1.1366562247276306, test_acc:0.8366412213740458


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.80it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.60it/s]
100%|██████████| 6/6 [00:00<00:00, 19.81it/s]


epoch:31, train_loss:0.09839864765052442, valid_loss:1.0873255729675293, test_acc:0.8412213740458016


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.80it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.42it/s]
100%|██████████| 6/6 [00:00<00:00, 19.79it/s]


epoch:32, train_loss:0.0922147906212895, valid_loss:1.088740050792694, test_acc:0.8442748091603054


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.83it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.26it/s]
100%|██████████| 6/6 [00:00<00:00, 19.80it/s]


epoch:33, train_loss:0.07967405131569615, valid_loss:1.0968186259269714, test_acc:0.8366412213740458


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.76it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.51it/s]
100%|██████████| 6/6 [00:00<00:00, 19.71it/s]


epoch:34, train_loss:0.07729263120779285, valid_loss:1.0885278582572937, test_acc:0.8366412213740458


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.77it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.35it/s]
100%|██████████| 6/6 [00:00<00:00, 19.81it/s]


epoch:35, train_loss:0.07779825106263161, valid_loss:1.111832320690155, test_acc:0.8427480916030534


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.79it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.60it/s]
100%|██████████| 6/6 [00:00<00:00, 19.82it/s]


epoch:36, train_loss:0.07400428655522841, valid_loss:1.132395476102829, test_acc:0.8473282442748091


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.83it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.56it/s]
100%|██████████| 6/6 [00:00<00:00, 19.82it/s]


epoch:37, train_loss:0.07070976481945426, valid_loss:1.1113368570804596, test_acc:0.8396946564885496


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.83it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.51it/s]
100%|██████████| 6/6 [00:00<00:00, 19.83it/s]


epoch:38, train_loss:0.07155398031075795, valid_loss:1.1246662139892578, test_acc:0.8259541984732824


Iteration: 100%|██████████| 27/27 [00:02<00:00, 11.77it/s]
Iteration: 100%|██████████| 2/2 [00:00<00:00, 20.32it/s]
100%|██████████| 6/6 [00:00<00:00, 19.80it/s]

epoch:39, train_loss:0.0746192414727476, valid_loss:1.070777177810669, test_acc:0.8259541984732824
Total Result:
best_accuracy:  0.8473282442748091
best loss:  1.070777177810669





### Model Save and Load

In [19]:
torch.save(model.state_dict(), "./saved_models_MAG_mosi_onlybert.pt")

In [40]:
# multimodal_config = MultimodalConfig(beta_shift=args.beta_shift, dropout_prob=args.dropout_prob)
# bert_config = BertConfig(hidden_dropout_prob=args.dropout_prob)
# model = MAG_BertForSequenceClassification.from_pretrained(args.model, multimodal_config=multimodal_config, num_labels=1)
model.load_state_dict(torch.load("./saved_models_MAG_mosi_onlybert.pt"))
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [48]:
multimodal_config = MultimodalConfig(beta_shift=args.beta_shift, dropout_prob=args.dropout_prob)
bert_config = BertConfig(hidden_dropout_prob=args.dropout_prob)
model_fusion = MAG_BertForSequenceClassification.from_pretrained(args.model, multimodal_config=multimodal_config, num_labels=1)
model_fusion.load_state_dict(torch.load("./saved_models_MAG_mosi.pt"))
model_fusion.eval()

Initializing MAG with beta_shift:1.0 hidden_prob:0.5


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MAG_BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing MAG_BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MAG_BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MAG_BertForSequenceClassification were not initialized from the mod

MAG_BertForSequenceClassification(
  (bert): MAG_BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

### Sentiment Intensity Reflection of Fustion Embedding Space

In [None]:
with open(f"../datasets/{args.dataset}.pkl", "rb") as handle:
    data = pickle.load(handle)

train_data = data["train"]
dev_data = data["dev"]
test_data = data["test"]

train_dataset, train_tokenizer = get_appropriate_dataset(train_data)
dev_dataset, dev_tokenizer = get_appropriate_dataset(dev_data)
test_dataset, test_tokenizer = get_appropriate_dataset(test_data)

test_data_loader = DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=False)

model.eval()

test_embeddings = torch.zeros((0, 100), dtype=torch.float32)
preds = []
labels = []
classes = []
pred_classes = []

# Gold 7-Class
for idx in range(len(test_data)):
    (word, visual, acoustic), label, segment = test_data[idx]
    if label < -15/7:
        classes.append(-3)
    elif label < -9/7:
        classes.append(-2)
    elif label < -3/7:
        classes.append(-1)
    elif label < 3/7:
        classes.append(0)
    elif label < 9/7:
        classes.append(1)
    elif label < 15/7:
        classes.append(2)
    else:
        classes.append(3)
classes = np.array(classes)

# MAG-BERT Model output
with torch.no_grad():
    for i, batch in enumerate(tqdm(test_data_loader)):
        batch = tuple(t.to(DEVICE) for t in batch)

        input_ids, visual, acoustic, input_mask, segment_ids, label_ids = batch
        visual = torch.squeeze(visual, 1)
        acoustic = torch.squeeze(acoustic, 1)
        outputs = model(
            input_ids,
            visual,
            acoustic,
            token_type_ids=segment_ids,
            attention_mask=input_mask,
            labels=None
        )

        logits = outputs[0]
        embeddings = outputs[1:]

        test_embeddings = torch.cat((test_embeddings, embeddings.detach().cpu()), 0)

        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.detach().cpu().numpy()

        preds.extend(np.squeeze(logits).tolist())
        labels.extend(np.squeeze(label_ids).tolist())

        preds = np.array(preds)
        labels = np.array(labels)

        # 7-class prediction
        for logit in logits:
            if logit < -15/7:
                pred_classes.append(-3)
            elif logit < -9/7:
                pred_classes.append(-2)
            elif logit < -3/7:
                pred_classes.append(-1)
            elif logit < 3/7:
                pred_classes.append(0)
            elif logit < 9/7:
                pred_classes.append(1)
            elif logit < 15/7:
                pred_classes.append(2)
            else:
                pred_classes.append(3)
        pred_classes = np.array(pred_classes)
    
    

In [None]:
# Create a two dimensional t-SNE projection of the embeddings
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from sklearn.manifold import TSNE

tsne = TSNE(2, verbose=1)
tsne_proj = tsne.fit_transform(test_embeddings)
cmap = cm.get_cmap('tab20')
fig, ax = plt.subplot(figsize=(8,8))
# num_categories = 7
for lab in range(-3, 3):
    indices = pred_classes==lab
    

### Test Instance Prediction Result

In [49]:
def test_instance(model: nn.Module):
    model_fusion.eval()
    segment_list = []
    words_list = []
    preds = []
    preds_2 = []
    preds_7 = []
    labels = []
    labels_2 = []
    labels_7 = []

    with open(f"../datasets/MOSI/{args.dataset}.pkl", "rb") as handle:
        data = pickle.load(handle)

    # test_data[idx] = (words, visual, acoustic), label, segment
    test_data = data["test"]
    test_dataset, test_tokenizer = get_appropriate_dataset(test_data)
    test_dataloader = DataLoader(
        test_dataset, batch_size=args.test_batch_size, shuffle=False,
    )

    video = set()
    count = 0

    for idx in range(len(test_data)):
        (words, visual, acoustic), label, segment = test_data[idx]
        if args.dataset == 'mosi':
            segment_list.append(segment)
        else:
            video_name = segment[0]
            if video_name in video:
                count += 1
            else:
                video.add(video_name)
                count = 0
            segment_list.append(video_name + '[' + str(count) + ']')

        words_list.append(words)
        labels.append(label[0][0])

        # label_2 appending
        if label > 0:
            labels_2.append('positive')
        else:
            labels_2.append('negative')
        
        # label_7 appending
        if label < -15/7:
            labels_7.append('very negative')
        elif label < -9/7:
            labels_7.append('negative')
        elif label < -3/7:
            labels_7.append('slightly negative')
        elif label < 3/7:
            labels_7.append('Neutral')
        elif label < 9/7:
            labels_7.append('slightly positive')
        elif label < 15/7:
            labels_7.append('positive')
        else:
            labels_7.append('very positive')
            
    # prediction
    with torch.no_grad():
        for i, batch in enumerate(tqdm(test_dataloader)):
            batch = tuple(t.to(DEVICE) for t in batch)

            input_ids, visual, acoustic, input_mask, segment_ids, label_ids = batch
            visual = torch.squeeze(visual, 1)
            acoustic = torch.squeeze(acoustic, 1)
            outputs = model_fusion(
                input_ids,
                visual,
                acoustic,
                token_type_ids=segment_ids,
                attention_mask=input_mask,
                labels=None
            )
            # outputs = model(
            # input_ids,
            # token_type_ids=segment_ids,
            # attention_mask=input_mask,
            # labels=None
            # )
            logits = outputs[0]

            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.detach().cpu().numpy()

            logits = np.squeeze(logits).tolist()
            label_ids = np.squeeze(label_ids).tolist()

            preds.extend(logits)

            for logit in logits:
                # preds_2 appending
                if logit > 0:
                    preds_2.append('positive')
                else:
                    preds_2.append('negative')

                # label_7 appending
                if logit < -15/7:
                    preds_7.append('very negative')
                elif logit < -9/7:
                    preds_7.append('negative')
                elif logit < -3/7:
                    preds_7.append('slightly negative')
                elif logit < 3/7:
                    preds_7.append('Neutral')
                elif logit < 9/7:
                    preds_7.append('slightly positive')
                elif logit < 15/7:
                    preds_7.append('positive')
                else:
                    preds_7.append('very positive')

                # if logit < -15/7:
                #     preds_7.append('-3')
                # elif logit < -9/7:
                #     preds_7.append('-2')
                # elif logit < -3/7:
                #     preds_7.append('-1')
                # elif logit < 3/7:
                #     preds_7.append('0')
                # elif logit < 9/7:
                #     preds_7.append('1')
                # elif logit < 15/7:
                #     preds_7.append('2')
                # else:
                #     preds_7.append('3')

            

    count = 0
    # for i in range(len(segment_list)):
    #     print(i, "th data")
    #     print(segment_list[i])
    #     print(words_list[i])
    #     print(labels[i])
    #     print(labels_2[i])
    #     print(labels_7[i])
    #     print(preds[i])
    #     print(preds_2[i])
    #     print(preds_7[i])

    return segment_list, words_list, labels, labels_2, labels_7, preds, preds_2, preds_7


In [50]:
model_fusion = model_fusion.to(DEVICE)
segment_list, words_list, labels, labels_2, labels_7, preds, preds_2, preds_7 = test_instance(model=model)

100%|██████████| 6/6 [00:00<00:00, 19.90it/s]


In [24]:
from ipywidgets import interact

@interact
def get_predict_result(idx = range(len(segment_list))):
    print("SEGMENT:", segment_list[idx])
    print("WORDS:", words_list[idx])
    print("GOLD_VALUE:", labels[idx])
    print("GOLD_BINARY:", labels_2[idx])
    print("GOLD_7_CLASS:", labels_7[idx])
    print("PREDICTED_VALUE:", preds[idx])
    print("PREDICTED_BINARY:", preds_2[idx])
    print("PREDICTED _7_CLASS:", preds_7[idx])

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…

In [43]:
import plotly.express as px
import plotly.subplots as sp
import pandas as pd

d = {'segmentID': segment_list, 'words': words_list, 'labels': labels, 'labels_2': labels_2, 'labels_7': labels_7, 'preds': preds, 'preds_2': preds_2, 'preds_7': preds_7}
df = pd.DataFrame(data=d)
order = ['very negative', 'negative', 'slightly negative', 'Neutral', 'slightly positive', 'positive', 'very positive']

fig1 = px.bar(df, x="labels_7")
fig2 = px.bar(df, x="preds_7")

fig1_traces = []
fig2_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("Gold", "MAGBERT"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)

# this_figure.update_layout(height=600, width=1500, title_text="CMU-MOSI 7 Class Sentiment Intensity")
this_figure.update_layout(title_text="CMU-MOSI 7 Class Sentiment Intensity")
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
this_figure.update_yaxes(range=[0,200])
this_figure.show()

## Prediction Value Box Plot

In [51]:
label_VN, label_N, label_SN, label_Neu, label_SP, label_P, label_VP = [], [], [], [], [], [], []

for idx in range(len(segment_list)):
    if labels_7[idx] == 'very negative': label_VN.append(preds[idx])
    if labels_7[idx] == 'negative': label_N.append(preds[idx])
    if labels_7[idx] == 'slightly negative': label_SN.append(preds[idx])
    if labels_7[idx] == 'Neutral': label_Neu.append(preds[idx])
    if labels_7[idx] == 'slightly positive': label_SP.append(preds[idx])
    if labels_7[idx] == 'positive': label_P.append(preds[idx])
    if labels_7[idx] == 'very positive': label_VP.append(preds[idx])

In [27]:
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go
import pandas as pd

d = {'segmentID': segment_list, 'words': words_list, 'labels': labels, 'labels_2': labels_2, 'labels_7': labels_7, 'preds': preds, 'preds_2': preds_2, 'preds_7': preds_7}
df = pd.DataFrame(data=d)
order = ['very negative', 'negative', 'slightly negative', 'Neutral', 'slightly positive', 'positive', 'very positive']

fig = px.box(df, x="labels_7", y="preds", points="all")
fig.add_trace(
    go.Scatter(x=order, y=[np.mean(label_VN), np.mean(label_N), np.mean(label_SN), np.mean(label_Neu), np.mean(label_SP), np.mean(label_P), np.mean(label_VP)], \
         mode='markers', name="mean"))
fig.add_trace(go.Scatter(x=order, y=[-3, -2, -1, 0, 1, 2, 3], mode='markers', name="gold"))

fig.update_xaxes(categoryorder='array', categoryarray= order)
fig.update_layout(title_text="CMU-MOSI MAGBERT prediction value distribution")
fig.show()

## Sentiment Intensity Regression Error Distribution

In [52]:
error_VN, error_N, error_SN, error_Neu, error_SP, error_P, error_VP = [], [], [], [], [], [], []

for idx in range(len(segment_list)):
    if labels_7[idx] == 'very negative': error_VN.append(abs(preds[idx] - (-3.0)))
    if labels_7[idx] == 'negative': error_N.append(abs(preds[idx] - (-2.0)))
    if labels_7[idx] == 'slightly negative': error_SN.append(abs(preds[idx] - (-1.0)))
    if labels_7[idx] == 'Neutral': error_Neu.append(abs(preds[idx]))
    if labels_7[idx] == 'slightly positive': error_SP.append(abs(preds[idx] - 1.0))
    if labels_7[idx] == 'positive': error_P.append(abs(preds[idx] - 2.0))
    if labels_7[idx] == 'very positive': error_VP.append(abs(preds[idx] - 3.0))

In [32]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Box(y=error_VN, name="very negative", marker_color="#1984c5"))
fig.add_trace(go.Box(y=error_N, name="negative", marker_color="#22a7f0"))
fig.add_trace(go.Box(y=error_SN, name="slightly negative", marker_color="#63bff0"))
fig.add_trace(go.Box(y=error_Neu, name="Neutral", marker_color="#63bff0"))
fig.add_trace(go.Box(y=error_SP, name="slightly positive", marker_color="#63bff0"))
fig.add_trace(go.Box(y=error_P, name="positive", marker_color="#22a7f0"))
fig.add_trace(go.Box(y=error_VP, name="very positive", marker_color="#1984c5"))
fig.update_traces(boxpoints='all', jitter=0)
fig.add_trace(
    go.Scatter(x=order, y=[np.mean(error_VN), np.mean(error_N), np.mean(error_SN), np.mean(error_Neu), np.mean(error_SP), np.mean(error_P), np.mean(error_VP)], \
         mode='markers', name="Mean Absolute Error", marker_color="red"))
fig.update_layout(yaxis_title="Absolute Error" ,title_text="MAGBERT Sentiment Intensity Prediction Error Distribution - [onlyBERT]")
fig.show()

In [31]:
print("<CMU-MOSI MAGBERT [onlyBERT] MAE>")
print("VERY NEGATIVE", np.mean(error_VN))
print("NEGATIVE", np.mean(error_N))
print("SLIGHTLY NEGATIVE", np.mean(error_SN))
print("NEUTRAL", np.mean(error_Neu))
print("SLIGHTLY POSITIVE", np.mean(error_SP))
print("POSITIVE", np.mean(error_P))
print("VERY POSITIVE", np.mean(error_VP))

<CMU-MOSI MAGBERT [onlyBERT] MAE>
VERY NEGATIVE 1.5137611697427928
NEGATIVE 0.9559251483662852
SLIGHTLY NEGATIVE 0.7201637340363412
NEUTRAL 0.7559183636136974
SLIGHTLY POSITIVE 0.7959774812607436
POSITIVE 0.6029950755826653
VERY POSITIVE 1.2766265792500537


In [47]:
# Text
means = [np.mean(error_VN), np.mean(error_N), np.mean(error_SN), np.mean(error_Neu), np.mean(error_SP), np.mean(error_P), np.mean(error_VP)]
print(np.var(means))


0.09415104486028307


In [53]:
# Fusion
means = [np.mean(error_VN), np.mean(error_N), np.mean(error_SN), np.mean(error_Neu), np.mean(error_SP), np.mean(error_P), np.mean(error_VP)]
print(np.var(means))

0.1551135231270337
