In [1]:
import torch
import argparse
import numpy as np

from utils import *
from torch.utils.data import DataLoader
from solver import Solver
from config import get_args, get_config, output_dim_dict, criterion_dict
from data_loader import get_loader
from test_instance import TestMOSI, TestMOSEI

loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /home/soyeon/.cache/huggingface/transformers/45c3f7a79a80e1cf0a489e5c62b43f173c15db47864303a55d623bb3c96f72a5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
loading file https://huggingface.co/bert-base-uncased/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/bert-base-uncased/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json from cache at /home/soyeon/.cache/huggingface/transformers/c1d7f0a763fb63861cc08553866f1fc3e5a6f4f07621be277452d26d71303b7e.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/soyeon/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f

In [2]:
def set_seed(seed):
    # torch.set_default_tensor_type('torch.FloatTensor')
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        # torch.set_default_tensor_type('torch.cuda.FloatTensor')

        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        use_cuda = True

In [3]:
# path to a pretrained word embedding file
word_emb_path = '/mnt/soyeon/workspace/glove.840B.300d.txt'
# word_emb_path = '/home/ubuntu/soyeon/glove.840B.300d.txt'
assert(word_emb_path is not None)

In [4]:
from datetime import datetime
from pathlib import Path
import pprint
from torch import optim
import torch.nn as nn

# username = Path.home().name
# project_dir = Path(__file__).resolve().parent.parent
# sdk_dir = project_dir.joinpath('CMU-MultimodalSDK')
# data_dir = project_dir.joinpath('datasets')

sdk_dir = Path('/mnt/soyeon/workspace/multimodal/CMU-MultimodalSDK')
data_dir = Path('/mnt/soyeon/workspace/multimodal/MSIR/datasets')
# sdk_dir = Path('/home/ubuntu/soyeon/CMU-MultimodalSDK')
# data_dir = Path('/home/ubuntu/soyeon/MSIR/datasets')

data_dict = {'mosi': data_dir.joinpath('MOSI'), 'mosei': data_dir.joinpath(
    'MOSEI'), 'ur_funny': data_dir.joinpath('UR_FUNNY')}
optimizer_dict = {'RMSprop': optim.RMSprop, 'Adam': optim.Adam}
activation_dict = {'elu': nn.ELU, "hardshrink": nn.Hardshrink, "hardtanh": nn.Hardtanh,
                   "leakyrelu": nn.LeakyReLU, "prelu": nn.PReLU, "relu": nn.ReLU, "rrelu": nn.RReLU,
                   "tanh": nn.Tanh}

output_dim_dict = {
    'mosi': 1,
    'mosei_senti': 1,
}

criterion_dict = {
    'mosi': 'L1Loss',
    'iemocap': 'CrossEntropyLoss',
    'ur_funny': 'CrossEntropyLoss'
}

In [5]:
import easydict

args = easydict.EasyDict({
    # Tasks
    "dataset": "mosi",
    "data_path": "datasets",

    # Dropouts
    "dropout_a": 0.1,
    "dropout_v": 0.1,
    "dropout_prj": 0.1,

    # Architecture
    "multiseed": True,
    "contrast": True,
    "add_va": True,
    "n_layer": 1,
    "cpc_layers": 1,
    "d_vh": 16,
    "d_ah": 16,
    "d_vout": 16,
    "d_aout": 16,
    "bidirectional": True,
    "d_prjh": 128,
    "pretrain_emb": 768,

    # Activations
    "mmilb_mid_activation": "ReLU",
    "mmilb_last_activation": "Tanh",
    "cpc_activation": "Tanh",

    # Training Setting
    "batch_size": 32,
    "clip": 1.0,
    "lr_main": 1e-3,
    "lr_bert": 5e-5,
    "lr_mmilb": 1e-3,
    "alpha": 0.1,
    "beta": 0.1,
    "weight_decay_main": 1e-4,
    "weight_decay_bert": 1e-4,
    "weight_decay_club": 1e-4,
    "optim": "Adam",
    "num_epochs": 40,
    "when": 20,
    "patience": 10,
    "update_batch": 1,

    # Logistics
    "log_interval": 100,
    "seed": 1111
})

In [6]:
def str2bool(v):
    """string to boolean"""
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

In [7]:
class Config(object):
    def __init__(self, data, mode='train'):
        """Configuration Class: set kwargs as class attributes with setattr"""
        self.dataset_dir = data_dict[data.lower()]
        self.sdk_dir = sdk_dir
        self.mode = mode
        # Glove path
        self.word_emb_path = word_emb_path

        # Data Split ex) 'train', 'valid', 'test'
        self.data_dir = self.dataset_dir

    def __str__(self):
        """Pretty-print configurations in alphabetical order"""
        config_str = 'Configurations\n'
        config_str += pprint.pformat(self.__dict__)
        return config_str


def get_config(dataset='mosi', mode='train', batch_size=32):
    config = Config(data=dataset, mode=mode)
    
    config.dataset = dataset
    config.batch_size = batch_size

    return config

In [8]:
dataset = str.lower(args.dataset.strip())

set_seed(args.seed)
print("Start loading the data....")
train_config = get_config(dataset, mode='train', batch_size=args.batch_size)
valid_config = get_config(dataset, mode='valid', batch_size=args.batch_size)
test_config = get_config(dataset, mode='test',  batch_size=args.batch_size)

# pretrained_emb saved in train_config here
train_loader = get_loader(args, train_config, shuffle=True)
print('Training data loaded!')
valid_loader = get_loader(args, valid_config, shuffle=False)
print('Validation data loaded!')
test_loader = get_loader(args, test_config, shuffle=False)
print('Test data loaded!')
print('Finish loading the data....')

torch.autograd.set_detect_anomaly(True)

# addintional appending
args.word2id = train_config.word2id

# architecture parameters
args.d_tin, args.d_vin, args.d_ain = train_config.tva_dim
args.dataset = args.data = dataset
args.when = args.when
args.n_class = output_dim_dict.get(dataset, 1)
args.criterion = criterion_dict.get(dataset, 'MSELoss')

Start loading the data....
train
Training data loaded!
valid
Validation data loaded!
test
Test data loaded!
Finish loading the data....


In [9]:
solver = Solver(args, train_loader=train_loader, dev_loader=valid_loader, test_loader=test_loader, is_train=True)

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/soyeon/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file https://huggingface.co/bert-base-uncased/res

In [10]:
model = solver.model

In [12]:
model = solver.train_and_eval()

100%|██████████| 41/41 [00:13<00:00,  3.12it/s]
100%|██████████| 41/41 [00:15<00:00,  2.69it/s]


--------------------------------------------------
Epoch  1 | Time 29.5137 sec | Valid Loss 1.0112 | Test Loss 0.9232
--------------------------------------------------
MAE:  0.92319214
Correlation Coefficient:  0.7049294243292297
mult_acc_7:  0.29300291545189505
mult_acc_5:  0.3206997084548105
F1 score all/non0: 0.7791/0.8099 over 686/656
Accuracy all/non0: 0.7886/0.8171
--------------------------------------------------
Saved model at pre_trained_models/MM.pt!


100%|██████████| 41/41 [00:13<00:00,  3.07it/s]
100%|██████████| 41/41 [00:15<00:00,  2.66it/s]


--------------------------------------------------
Epoch  2 | Time 29.8839 sec | Valid Loss 0.8006 | Test Loss 0.9007
--------------------------------------------------
MAE:  0.9006584
Correlation Coefficient:  0.7722844640725065
mult_acc_7:  0.34839650145772594
mult_acc_5:  0.39504373177842567
F1 score all/non0: 0.8052/0.8121 over 686/656
Accuracy all/non0: 0.8047/0.811
--------------------------------------------------
Saved model at pre_trained_models/MM.pt!


100%|██████████| 41/41 [00:14<00:00,  2.91it/s]
100%|██████████| 41/41 [00:14<00:00,  2.74it/s]


--------------------------------------------------
Epoch  3 | Time 30.1106 sec | Valid Loss 0.7514 | Test Loss 0.8360
--------------------------------------------------
MAE:  0.83601546
Correlation Coefficient:  0.7606723708997617
mult_acc_7:  0.3979591836734694
mult_acc_5:  0.46938775510204084
F1 score all/non0: 0.8037/0.8167 over 686/656
Accuracy all/non0: 0.8032/0.8155
--------------------------------------------------
Saved model at pre_trained_models/MM.pt!


100%|██████████| 41/41 [00:13<00:00,  3.11it/s]
100%|██████████| 41/41 [00:14<00:00,  2.82it/s]


--------------------------------------------------
Epoch  4 | Time 28.8141 sec | Valid Loss 0.7034 | Test Loss 0.7679
--------------------------------------------------
MAE:  0.76791286
Correlation Coefficient:  0.7699020453843733
mult_acc_7:  0.4314868804664723
mult_acc_5:  0.5058309037900874
F1 score all/non0: 0.8095/0.8181 over 686/656
Accuracy all/non0: 0.809/0.8171
--------------------------------------------------
Saved model at pre_trained_models/MM.pt!


100%|██████████| 41/41 [00:13<00:00,  3.03it/s]
100%|██████████| 41/41 [00:14<00:00,  2.84it/s]


--------------------------------------------------
Epoch  5 | Time 29.0574 sec | Valid Loss 0.7781 | Test Loss 0.7922
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.12it/s]
100%|██████████| 41/41 [00:14<00:00,  2.76it/s]


--------------------------------------------------
Epoch  6 | Time 29.1698 sec | Valid Loss 0.7404 | Test Loss 0.7657
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.11it/s]
100%|██████████| 41/41 [00:14<00:00,  2.80it/s]


--------------------------------------------------
Epoch  7 | Time 28.9853 sec | Valid Loss 0.7124 | Test Loss 0.7803
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.09it/s]
100%|██████████| 41/41 [00:14<00:00,  2.78it/s]


--------------------------------------------------
Epoch  8 | Time 29.1328 sec | Valid Loss 0.7333 | Test Loss 0.7834
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  2.95it/s]
100%|██████████| 41/41 [00:14<00:00,  2.76it/s]


--------------------------------------------------
Epoch  9 | Time 29.8324 sec | Valid Loss 0.7093 | Test Loss 0.8292
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.11it/s]
100%|██████████| 41/41 [00:14<00:00,  2.81it/s]


--------------------------------------------------
Epoch 10 | Time 28.8174 sec | Valid Loss 0.7438 | Test Loss 0.7813
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.02it/s]
100%|██████████| 41/41 [00:14<00:00,  2.79it/s]


--------------------------------------------------
Epoch 11 | Time 29.4146 sec | Valid Loss 0.6903 | Test Loss 0.7968
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.09it/s]
100%|██████████| 41/41 [00:14<00:00,  2.82it/s]


--------------------------------------------------
Epoch 12 | Time 28.8740 sec | Valid Loss 0.7238 | Test Loss 0.8273
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.12it/s]
100%|██████████| 41/41 [00:14<00:00,  2.82it/s]


--------------------------------------------------
Epoch 13 | Time 28.7194 sec | Valid Loss 0.7168 | Test Loss 0.7585
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.14it/s]
100%|██████████| 41/41 [00:14<00:00,  2.78it/s]


--------------------------------------------------
Epoch 14 | Time 28.8718 sec | Valid Loss 0.7099 | Test Loss 0.7712
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.06it/s]
100%|██████████| 41/41 [00:14<00:00,  2.79it/s]


--------------------------------------------------
Epoch 15 | Time 29.1796 sec | Valid Loss 0.7390 | Test Loss 0.7543
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.10it/s]
100%|██████████| 41/41 [00:14<00:00,  2.78it/s]


--------------------------------------------------
Epoch 16 | Time 29.0492 sec | Valid Loss 0.6889 | Test Loss 0.7633
--------------------------------------------------
MAE:  0.76330894
Correlation Coefficient:  0.7632117147219627
mult_acc_7:  0.44169096209912534
mult_acc_5:  0.5058309037900874
F1 score all/non0: 0.8042/0.8187 over 686/656
Accuracy all/non0: 0.8047/0.8186
--------------------------------------------------
Saved model at pre_trained_models/MM.pt!


100%|██████████| 41/41 [00:13<00:00,  3.09it/s]
100%|██████████| 41/41 [00:14<00:00,  2.77it/s]


--------------------------------------------------
Epoch 17 | Time 29.1115 sec | Valid Loss 0.6963 | Test Loss 0.7905
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.10it/s]
100%|██████████| 41/41 [00:14<00:00,  2.80it/s]


--------------------------------------------------
Epoch 18 | Time 28.9434 sec | Valid Loss 0.6939 | Test Loss 0.7551
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.12it/s]
100%|██████████| 41/41 [00:14<00:00,  2.81it/s]


--------------------------------------------------
Epoch 19 | Time 28.7966 sec | Valid Loss 0.6965 | Test Loss 0.7637
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.14it/s]
100%|██████████| 41/41 [00:14<00:00,  2.81it/s]


--------------------------------------------------
Epoch 20 | Time 28.7272 sec | Valid Loss 0.6969 | Test Loss 0.7638
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.12it/s]
100%|██████████| 41/41 [00:14<00:00,  2.80it/s]


--------------------------------------------------
Epoch 21 | Time 28.8748 sec | Valid Loss 0.7047 | Test Loss 0.7489
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.13it/s]
100%|██████████| 41/41 [00:14<00:00,  2.82it/s]


--------------------------------------------------
Epoch 22 | Time 28.6730 sec | Valid Loss 0.6970 | Test Loss 0.7681
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.11it/s]
100%|██████████| 41/41 [00:14<00:00,  2.80it/s]


--------------------------------------------------
Epoch 23 | Time 28.8483 sec | Valid Loss 0.6816 | Test Loss 0.7937
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.10it/s]
100%|██████████| 41/41 [00:14<00:00,  2.83it/s]


--------------------------------------------------
Epoch 24 | Time 28.8005 sec | Valid Loss 0.6961 | Test Loss 0.7709
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.13it/s]
100%|██████████| 41/41 [00:14<00:00,  2.84it/s]


--------------------------------------------------
Epoch 25 | Time 28.5931 sec | Valid Loss 0.7001 | Test Loss 0.7832
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.01it/s]
100%|██████████| 41/41 [00:14<00:00,  2.79it/s]


--------------------------------------------------
Epoch 26 | Time 29.3907 sec | Valid Loss 0.7525 | Test Loss 0.9096
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.09it/s]
100%|██████████| 41/41 [00:14<00:00,  2.79it/s]


--------------------------------------------------
Epoch 27 | Time 28.9865 sec | Valid Loss 0.8651 | Test Loss 0.8780
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.13it/s]
100%|██████████| 41/41 [00:14<00:00,  2.81it/s]


--------------------------------------------------
Epoch 28 | Time 28.7608 sec | Valid Loss 0.7598 | Test Loss 0.8687
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.11it/s]
100%|██████████| 41/41 [00:14<00:00,  2.83it/s]


--------------------------------------------------
Epoch 29 | Time 28.7443 sec | Valid Loss 0.7350 | Test Loss 0.8852
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.10it/s]
100%|██████████| 41/41 [00:14<00:00,  2.79it/s]


--------------------------------------------------
Epoch 30 | Time 28.9556 sec | Valid Loss 0.8216 | Test Loss 0.8607
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.12it/s]
100%|██████████| 41/41 [00:14<00:00,  2.78it/s]


--------------------------------------------------
Epoch 31 | Time 28.9729 sec | Valid Loss 1.0651 | Test Loss 1.2261
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.12it/s]
100%|██████████| 41/41 [00:14<00:00,  2.78it/s]


--------------------------------------------------
Epoch 32 | Time 28.9966 sec | Valid Loss 1.2240 | Test Loss 0.9041
--------------------------------------------------


100%|██████████| 41/41 [00:13<00:00,  3.12it/s]
100%|██████████| 41/41 [00:14<00:00,  2.78it/s]


--------------------------------------------------
Epoch 33 | Time 28.9449 sec | Valid Loss 1.3060 | Test Loss 1.7335
--------------------------------------------------
Best epoch: 16
MAE:  0.76330894
Correlation Coefficient:  0.7632117147219627
mult_acc_7:  0.44169096209912534
mult_acc_5:  0.5058309037900874
F1 score all/non0: 0.8042/0.8187 over 686/656
Accuracy all/non0: 0.8047/0.8186
--------------------------------------------------


In [30]:
torch.save(model.state_dict(), "./saved_models_MMIM_mosi.pt")

### Model Load

In [31]:
model.load_state_dict(torch.load("./saved_models_MMIM_mosi.pt"))
model.eval()

MMIM(
  (text_enc): LanguageEmbeddingLayer(
    (bertmodel): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
     

In [32]:
segment_list = []
tester = TestMOSI
tester = tester(model)
segment_list, preds, preds_2, preds_7 = tester.start()

100%|██████████| 69/69 [00:01<00:00, 52.62it/s]


In [33]:
len(segment_list)

685

In [34]:
import pickle
# Gold-truth
labels = []
labels_2 = []
labels_7 = []
with open(f"../datasets/MOSI/{args.dataset}.pkl", "rb") as handle:
    data = pickle.load(handle)

test_data = data["test"]

video = set()
count = 0

for idx in range(len(test_data)):
    (words, visual, acoustic), label, segment = test_data[idx]
    if args.dataset == 'mosi':
        assert segment_list[idx] == segment
    else:
        video_name = segment[0]
        if video_name in video:
            count += 1
        else:
            video.add(video_name)
            count = 0
        assert segment_list[idx] == segment

    labels.append(label[0][0])

    # label_2 appending
    if label > 0:
        labels_2.append('positive')
    else:
        labels_2.append('negative')
    
    # label_7 appending
    if label < -15/7:
        labels_7.append('very negative')
    elif label < -9/7:
        labels_7.append('negative')
    elif label < -3/7:
        labels_7.append('slightly negative')
    elif label < 3/7:
        labels_7.append('Neutral')
    elif label < 9/7:
        labels_7.append('slightly positive')
    elif label < 15/7:
        labels_7.append('positive')
    else:
        labels_7.append('very positive')
count = 0

In [35]:
from ipywidgets import interact

@interact
def get_predict_result(idx = range(len(segment_list))):
    print("SEGMENT:", segment_list[idx])
    print("GOLD_VALUE:", labels[idx])
    print("GOLD_BINARY:", labels_2[idx])
    print("GOLD_7_CLASS:", labels_7[idx])
    print("PREDICTED_VALUE:", preds[idx])
    print("PREDICTED_BINARY:", preds_2[idx])
    print("PREDICTED _7_CLASS:", preds_7[idx])

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…

In [36]:
count = 0
for i in range(len(preds_7)):
    if preds_7[i] == 'very negative': count += 1

print(count)

0


In [37]:
print(len(segment_list))
print(len(labels))
print(len(preds))

685
685
685


In [38]:
import plotly.express as px
import plotly.subplots as sp
import pandas as pd

d = {'segmentID': segment_list, 'labels': labels, 'labels_2': labels_2, 'labels_7': labels_7, 'preds': preds, 'preds_2': preds_2, 'preds_7': preds_7}
df = pd.DataFrame(data=d)
order = ['very negative', 'negative', 'slightly negative', 'Neutral', 'slightly positive', 'positive', 'very positive']

fig1 = px.bar(df, x="labels_7")
fig2 = px.bar(df, x="preds_7")

fig1_traces = []
fig2_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("Gold", "MIM"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)

# this_figure.update_layout(height=600, width=1500, title_text="CMU-MOSI 7 Class Sentiment Intensity")
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
# this_figure.update_yaxes(range=[0,250])
this_figure.show()

In [22]:
fig = px.box(df, x="labels_7", y="preds")
fig.update_xaxes(categoryorder='array', categoryarray= order)
fig.update_layout(title_text="CMU-MOSI MIM prediction value distribution")
fig.show()

In [19]:
label_VN, label_N, label_SN, label_Neu, label_SP, label_P, label_VP = [], [], [], [], [], [], []

for idx in range(len(segment_list)):
    if labels_7[idx] == 'very negative': label_VN.append(preds_7[idx])
    if labels_7[idx] == 'negative': label_N.append(preds_7[idx])
    if labels_7[idx] == 'slightly negative': label_SN.append(preds_7[idx])
    if labels_7[idx] == 'Neutral': label_Neu.append(preds_7[idx])
    if labels_7[idx] == 'slightly positive': label_SP.append(preds_7[idx])
    if labels_7[idx] == 'positive': label_P.append(preds_7[idx])
    if labels_7[idx] == 'very positive': label_VP.append(preds_7[idx])


In [20]:
print(len(label_VN))
print(len(label_N))
print(len(label_SN))
print(len(label_Neu))
print(len(label_SP))
print(len(label_P))
print(len(label_VP))

96
140
113
96
98
94
48


In [32]:
df1 = pd.DataFrame(data=label_VN)
df2 = pd.DataFrame(data=label_N)
df3 = pd.DataFrame(data=label_SN)
df4 = pd.DataFrame(data=label_Neu)
df5 = pd.DataFrame(data=label_SP)
df6 = pd.DataFrame(data=label_P)
df7 = pd.DataFrame(data=label_VP)
order = ['very negative', 'negative', 'slightly negative', 'Neutral', 'slightly positive', 'positive', 'very positive']

fig1 = px.bar(df1)
fig7 = px.bar(df7)

fig1_traces = []
fig7_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig7["data"])):
    fig7_traces.append(fig7["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("very negative", "very positive"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig7_traces:
    this_figure.append_trace(traces, row=1, col=2)

# this_figure.update_layout(height=600, width=1500, title_text="CMU-MOSI 7 Class Sentiment Intensity")
this_figure.update_layout(title_text='MIM', title_x=0.5)
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
# this_figure.update_yaxes(range=[0,250])
this_figure.show()

In [33]:
fig2 = px.bar(df2)
fig6 = px.bar(df6)

fig2_traces = []
fig6_traces = []

for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])
for trace in range(len(fig6["data"])):
    fig6_traces.append(fig6["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("negative", "positive"))
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig6_traces:
    this_figure.append_trace(traces, row=1, col=2)

# this_figure.update_layout(height=600, width=1500, title_text="CMU-MOSI 7 Class Sentiment Intensity")
this_figure.update_layout(title_text='MIM', title_x=0.5)
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
# this_figure.update_yaxes(range=[0,250])
this_figure.show()

In [34]:
fig3 = px.bar(df3)
fig4 = px.bar(df4)
fig5 = px.bar(df5)

fig3_traces = []
fig4_traces = []
fig5_traces = []

for trace in range(len(fig3["data"])):
    fig3_traces.append(fig3["data"][trace])
for trace in range(len(fig4["data"])):
    fig4_traces.append(fig4["data"][trace])
for trace in range(len(fig5["data"])):
    fig5_traces.append(fig5["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=3, subplot_titles=("slightly negative", "Neutral", "slightly positive"))
for traces in fig3_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig4_traces:
    this_figure.append_trace(traces, row=1, col=2)
for traces in fig5_traces:
    this_figure.append_trace(traces, row=1, col=3)

# this_figure.update_layout(height=600, width=1500, title_text="CMU-MOSI 7 Class Sentiment Intensity")
this_figure.update_layout(title_text='MIM', title_x=0.5)
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
# this_figure.update_yaxes(range=[0,250])
this_figure.show()

In [19]:
# CMU-MOSI MIM model prediction proportion
print("very negative answer proportion: ", 100 * label_VN.count('very negative')/len(label_VN))
print("negative answer proportion: ", 100 * label_N.count('negative')/len(label_N))
print("slightly negative answer proportion: ", 100 * label_SN.count('slightly negative')/len(label_SN))
print("Neutral answer proportion: ", 100 * label_Neu.count('Neutral')/len(label_Neu))
print("slightly positive answer proportion: ", 100 * label_SP.count('slightly positive')/len(label_SP))
print("positive answer proportion: ", 100 * label_P.count('positive')/len(label_P))
print("very positive answer proportion: ", 100 * label_VP.count('very positive')/len(label_VP))

very negative answer proportion:  28.125
negative answer proportion:  45.714285714285715
slightly negative answer proportion:  39.823008849557525
Neutral answer proportion:  56.25
slightly positive answer proportion:  26.53061224489796
positive answer proportion:  50.0
very positive answer proportion:  33.333333333333336


In [20]:
print("strong sentiment answer proportion: ", 100 * (label_VN.count('very negative') + label_VP.count('very positive')) / (len(label_VN) + len(label_VP)))
print("weak sentiment answer proportion: ", 100 * (label_SN.count('slightly negative') + label_Neu.count('Neutral') + label_SP.count('slightly positive')) / (len(label_SN) + len(label_Neu) + len(label_SP)))

strong sentiment answer proportion:  29.86111111111111
weak sentiment answer proportion:  40.71661237785016
