# args

In [1]:
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str,
                    choices=["mosi", "mosei"], default="mosi")
parser.add_argument("--max_seq_length", type=int, default=50)
parser.add_argument("--train_batch_size", type=int, default=48)
parser.add_argument("--dev_batch_size", type=int, default=128)
parser.add_argument("--test_batch_size", type=int, default=128)
parser.add_argument("--n_epochs", type=int, default=50)
parser.add_argument("--beta_shift", type=float, default=1.0)
parser.add_argument("--dropout_prob", type=float, default=0.5)
parser.add_argument(
    "--model",
    type=str,
    choices=["bert-base-uncased"],
    default="bert-base-uncased",
)
parser.add_argument("--learning_rate", type=float, default=1e-5)
parser.add_argument("--gradient_accumulation_step", type=int, default=1)
parser.add_argument("--warmup_proportion", type=float, default=0.1)
parser.add_argument("--seed", type=int, default=5576)
parser.add_argument("--mib", type=str, default='cmib')
# python main_mib.py --mib emib --dataset mosi --train_batch_size 32 --n_epochs 50
args = parser.parse_args(args=['--mib','emib','--train_batch_size','2','--n_epochs','2'])


# add path

In [2]:
import sys
import os
# Because notebooks are placed in subdirectories, add the path of the root directory
sys.path.append("..") 

# import MIB

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
if args.mib == 'cmib':
    from cmib import MIB
    print('selecting cmib!')
elif args.mib == 'emib':
    from emib import MIB
    print('selecting emib!')
elif args.mib == 'lmib':
    from lmib import MIB
    print('selecting lmib!')
else:
    print('error! you should choose from {cmib,emib,lmib}!')

selecting emib!


transformer version problem:

my version transformers 4.14.1

from transformers.models.bert.modeling_bert import BertPreTrainedModel

transformers=3.0.2=pypi_0

from transformers.modeling_bert import BertPreTrainedModel

# def set_random_seed()

In [5]:
import torch
import random
import numpy as np
def set_random_seed(seed: int):
    """
    Helper function to seed experiment for reproducibility.
    If -1 is provided as seed, experiment uses random seed from 0~9999

    Args:
        seed (int): integer to be used as seed, use -1 to randomly seed experiment
    """

    print("Seed: {}".format(seed))

    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.deterministic = True

    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

# global_strs

In [6]:
bert_path = '/Users/mac/Desktop/tools/bert-base-uncased'

# def get_tokenizer(model):

In [7]:
from transformers import BertTokenizer, XLNetTokenizer, get_linear_schedule_with_warmup

def get_tokenizer(model):
    if model == "bert-base-uncased":
        # bert_path = '/Users/mac/Desktop/tools/bert-base-uncased'
        return BertTokenizer.from_pretrained(bert_path)

    else:
        raise ValueError(
            "Expected 'bert-base-uncased' or 'xlnet-base-cased, but received {}".format(
                model
            )
        )

# def prepare_bert_input(tokens, visual, acoustic, tokenizer)

In [8]:
from global_configs import TEXT_DIM, ACOUSTIC_DIM, VISUAL_DIM, DEVICE

def prepare_bert_input(tokens, visual, acoustic, tokenizer):
    CLS = tokenizer.cls_token
    SEP = tokenizer.sep_token
    tokens = [CLS] + tokens + [SEP]

    # Pad zero vectors for acoustic / visual vectors to account for [CLS] / [SEP] tokens
    acoustic_zero = np.zeros((1, ACOUSTIC_DIM))
    acoustic = np.concatenate((acoustic_zero, acoustic, acoustic_zero))
    visual_zero = np.zeros((1, VISUAL_DIM))
    visual = np.concatenate((visual_zero, visual, visual_zero))

    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    segment_ids = [0] * len(input_ids)
    input_mask = [1] * len(input_ids)

    pad_length = args.max_seq_length - len(input_ids)

    acoustic_padding = np.zeros((pad_length, ACOUSTIC_DIM))
    acoustic = np.concatenate((acoustic, acoustic_padding))

    visual_padding = np.zeros((pad_length, VISUAL_DIM))
    visual = np.concatenate((visual, visual_padding))

    padding = [0] * pad_length

    # Pad inputs
    input_ids += padding
    input_mask += padding
    segment_ids += padding

    return input_ids, visual, acoustic, input_mask, segment_ids


# class InputFeatures(object)

In [9]:
class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, input_ids, visual, acoustic, input_mask, segment_ids, label_id):
        self.input_ids = input_ids
        self.visual = visual
        self.acoustic = acoustic
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_id = label_id


# def convert_to_features(examples, max_seq_length, tokenizer):

In [10]:
def convert_to_features(examples, max_seq_length, tokenizer):
    features = []

    for (ex_index, example) in enumerate(examples):

        (words, visual, acoustic), label_id, segment = example
       # print(words)
        tokens, inversions = [], []
        for idx, word in enumerate(words):
            tokenized = tokenizer.tokenize(word)
           # print(tokenized)
            tokens.extend(tokenized)
            inversions.extend([idx] * len(tokenized))

        # Check inversion
        assert len(tokens) == len(inversions)

        aligned_visual = []
        aligned_audio = []

        for inv_idx in inversions:
            aligned_visual.append(visual[inv_idx, :])
            aligned_audio.append(acoustic[inv_idx, :])

        visual = np.array(aligned_visual)
        acoustic = np.array(aligned_audio)

        # Truncate input if necessary
        if len(tokens) > max_seq_length - 2:
            tokens = tokens[: max_seq_length - 2]
            acoustic = acoustic[: max_seq_length - 2]
            visual = visual[: max_seq_length - 2]

        if args.model == "bert-base-uncased":
            prepare_input = prepare_bert_input
        elif args.model == "xlnet-base-cased":
            prepare_input = prepare_xlnet_input

        input_ids, visual, acoustic, input_mask, segment_ids = prepare_input(
            tokens, visual, acoustic, tokenizer
        )

        # Check input length
        assert len(input_ids) == args.max_seq_length
        assert len(input_mask) == args.max_seq_length
        assert len(segment_ids) == args.max_seq_length
        assert acoustic.shape[0] == args.max_seq_length
        assert visual.shape[0] == args.max_seq_length

        features.append(
            InputFeatures(
                input_ids=input_ids,
                input_mask=input_mask,
                segment_ids=segment_ids,
                visual=visual,
                acoustic=acoustic,
                label_id=label_id,
            )
        )
    return features


# get_appropriate_dataset(data)

In [11]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset

debug = True
def get_appropriate_dataset(data):

    tokenizer = get_tokenizer(args.model)

    features = convert_to_features(data, args.max_seq_length, tokenizer)
    all_input_ids = torch.tensor(
        [f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor(
        [f.input_mask for f in features], dtype=torch.long)
    all_segment_ids = torch.tensor(
        [f.segment_ids for f in features], dtype=torch.long)
    all_visual = torch.tensor([f.visual for f in features], dtype=torch.float)
    all_acoustic = torch.tensor(
        [f.acoustic for f in features], dtype=torch.float)
    all_label_ids = torch.tensor(
        [f.label_id for f in features], dtype=torch.float)

    if debug:
        dataset = TensorDataset(
        all_input_ids[:4],
        all_visual[:4],
        all_acoustic[:4],
        all_input_mask[:4],
        all_segment_ids[:4],
        all_label_ids[:4],
    )
    else:
        dataset = TensorDataset(
        all_input_ids,
        all_visual,
        all_acoustic,
        all_input_mask,
        all_segment_ids,
        all_label_ids,
    )
            
    return dataset

# def set_up_data_loader()

In [12]:
import pickle 
def set_up_data_loader():
    # /Users/mac/Desktop/big_file/mosi.pkl
    with open(f"/Users/mac/Desktop/big_file/{args.dataset}.pkl", "rb") as handle:
        data = pickle.load(handle)

    train_data = data["train"]
    dev_data = data["dev"]
    test_data = data["test"]

    train_dataset = get_appropriate_dataset(train_data)
    dev_dataset = get_appropriate_dataset(dev_data)
    test_dataset = get_appropriate_dataset(test_data)

    num_train_optimization_steps = (
        int(
            len(train_dataset) / args.train_batch_size /
            args.gradient_accumulation_step
        )
        * args.n_epochs
    )

    train_dataloader = DataLoader(
        train_dataset, batch_size=args.train_batch_size, shuffle=True
    )

    dev_dataloader = DataLoader(
        dev_dataset, batch_size=args.dev_batch_size, shuffle=True
    )

    test_dataloader = DataLoader(
        test_dataset, batch_size=args.test_batch_size, shuffle=True,
    )

    return (
        train_dataloader,
        dev_dataloader,
        test_dataloader,
        num_train_optimization_steps,
    )



# class MultimodalConfig(object) 

In [13]:
class MultimodalConfig(object):
    def __init__(self, beta_shift, dropout_prob):
        self.beta_shift = beta_shift
        self.dropout_prob = dropout_prob

# def prep_for_training(num_train_optimization_steps: int)

In [14]:
def prep_for_training(num_train_optimization_steps: int):
    multimodal_config = MultimodalConfig(
        beta_shift=args.beta_shift, dropout_prob=args.dropout_prob
    )   

    if args.model == "bert-base-uncased":
        model = MIB.from_pretrained(
            bert_path, multimodal_config=multimodal_config, num_labels=1,
        )   
   

    total_para = 0 
    for param in model.parameters():
        total_para += np.prod(param.size())
    print('total parameter for the model: ', total_para)



    model.to(DEVICE)

    return model

# def train_epoch(model, train_dataloader)

In [15]:
import torch.nn as nn
from tqdm import tqdm, trange
from torch.nn import CrossEntropyLoss, L1Loss, MSELoss

def train_epoch(model: nn.Module, train_dataloader: DataLoader):
    model.train()
    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
        batch = tuple(t.to(DEVICE) for t in batch)
        input_ids, visual, acoustic, input_mask, segment_ids, label_ids = batch
        visual = torch.squeeze(visual, 1)
        acoustic = torch.squeeze(acoustic, 1)
        outputs = model(
            input_ids,
            visual,
            acoustic,
            label_ids,
            token_type_ids=segment_ids,
            attention_mask=input_mask,
            labels=None,
        )

        logits = outputs #+ outputa + outputv

        loss_fct = MSELoss()
        loss = loss_fct(logits.view(-1), label_ids.view(-1))

        if args.gradient_accumulation_step > 1:
            loss = loss / args.gradient_accumulation_step

     

        tr_loss += loss.item()
        nb_tr_steps += 1

       

    return tr_loss / nb_tr_steps


# def eval_epoch(model, validation_dataloader)

In [16]:
def eval_epoch(model: nn.Module, dev_dataloader: DataLoader):
    model.eval()
    dev_loss = 0
    nb_dev_examples, nb_dev_steps = 0, 0
    with torch.no_grad():
        for step, batch in enumerate(tqdm(dev_dataloader, desc="Iteration")):
            batch = tuple(t.to(DEVICE) for t in batch)

            input_ids, visual, acoustic, input_mask, segment_ids, label_ids = batch
            visual = torch.squeeze(visual, 1)
            acoustic = torch.squeeze(acoustic, 1)
            outputs = model.test(
                input_ids,
                 visual,
                 acoustic,
                # label_ids,
                token_type_ids=segment_ids,
                attention_mask=input_mask,
               # labels=None,
            )

            logits = outputs

            loss_fct = MSELoss()
            loss = loss_fct(logits.view(-1), label_ids.view(-1))

            if args.gradient_accumulation_step > 1:
                loss = loss / args.gradient_accumulation_step

            dev_loss += loss.item()
            nb_dev_steps += 1

    return dev_loss / nb_dev_steps


# def test_epoch(model, test_dataloader)

In [17]:
def test_epoch(model: nn.Module, test_dataloader: DataLoader):
    model.eval()
    preds = []
    labels = []

    with torch.no_grad():
        for batch in tqdm(test_dataloader):
            batch = tuple(t.to(DEVICE) for t in batch)

            input_ids, visual, acoustic, input_mask, segment_ids, label_ids = batch
            visual = torch.squeeze(visual, 1)
            acoustic = torch.squeeze(acoustic, 1)
            outputs = model.test(
                input_ids,
                 visual,
                 acoustic,
                token_type_ids=segment_ids,
                attention_mask=input_mask,
                labels=None,
            )

            logits = outputs

            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.detach().cpu().numpy()

            logits = np.squeeze(logits).tolist()
            label_ids = np.squeeze(label_ids).tolist()

            preds.extend(logits)
            labels.extend(label_ids)

        preds = np.array(preds)
        labels = np.array(labels)

    return preds, labels


# def multiclass_acc(test_preds_a7, test_truth_a7)

In [18]:
def multiclass_acc(preds, truths):
    """ 
    Compute the multiclass accuracy w.r.t. groundtruth

    :param preds: Float array representing the predictions, dimension (N,)
    :param truths: Float/int array representing the groundtruth classes, dimension (N,)
    :return: Classification accuracy
    """
    return np.sum(np.round(preds) == np.round(truths)) / float(len(truths))

# def test_score_model(model, test_data_loader)

In [19]:
from sklearn.metrics import accuracy_score, f1_score

def test_score_model(model: nn.Module, test_dataloader: DataLoader, use_zero=False):

    preds, y_test = test_epoch(model, test_dataloader)
    non_zeros = np.array(
        [i for i, e in enumerate(y_test) if e != 0 or use_zero])

    test_preds_a7 = np.clip(preds, a_min=-3., a_max=3.)
    test_truth_a7 = np.clip(y_test, a_min=-3., a_max=3.)
    mult_a7 = multiclass_acc(test_preds_a7, test_truth_a7)


    preds = preds[non_zeros]
    y_test = y_test[non_zeros]



    mae = np.mean(np.absolute(preds - y_test))
    corr = np.corrcoef(preds, y_test)[0][1]

    preds = preds >= 0
    y_test = y_test >= 0

    f_score = f1_score(y_test, preds, average="weighted")
    acc = accuracy_score(y_test, preds)


    return acc, mae, corr, f_score, mult_a7

# def train(...)

In [20]:
def train(
    model,
    train_dataloader,
    validation_dataloader,
    test_data_loader
):
    valid_losses = []
    test_accuracies = []
    best_loss = 10
    for epoch_i in range(int(args.n_epochs)):
        train_loss = train_epoch(model, train_dataloader)
        valid_loss = eval_epoch(model, validation_dataloader)
        test_acc, test_mae, test_corr, test_f_score, test_acc7 = test_score_model(
            model, test_data_loader
        )

        print(
            "epoch:{}, train_loss:{:.4f}, valid_loss:{:.4f}, test_acc:{:.4f}".format(
                epoch_i, train_loss, valid_loss, test_acc
            )
        )


        print(
            "current mae:{:.4f}, current acc:{:.4f}, acc7:{:.4f}, f1:{:.4f}, corr:{:.4f}".format(
                test_mae, test_acc, test_acc7, test_f_score, test_corr
            )
        )


        valid_losses.append(valid_loss)
        test_accuracies.append(test_acc)

        if valid_loss < best_loss:
            best_loss = valid_loss
            best_acc = test_acc
            best_mae = test_mae
            best_corr = test_corr
            best_f_score = test_f_score
            best_acc_7 = test_acc7
        print(
            "best mae:{:.4f}, acc:{:.4f}, acc7:{:.4f}, f1:{:.4f}, corr:{:.4f}".format(
                best_mae, best_acc, best_acc_7, best_f_score, best_corr
            )
        )
    
        wandb.log(
            (
                {
                    "train_loss": train_loss,
                    "valid_loss": valid_loss,
                    "test_acc": test_acc,
                    "test_mae": test_mae,
                    "test_corr": test_corr,
                    "test_f_score": test_f_score,
                    "test_acc7": test_acc7,
                    "best_valid_loss": min(valid_losses),
                    "best_test_acc": max(test_accuracies),
                }
            )
        )


# def main()

In [21]:
import wandb

def main():
    print(args)
    wandb.init(project="prj4")
    wandb.config.update(args)
    set_random_seed(args.seed)
    (
        train_data_loader,
        dev_data_loader,
        test_data_loader,
        num_train_optimization_steps,
    ) = set_up_data_loader()
    # print(len(next(iter(train_data_loader))))
    model = prep_for_training(
        num_train_optimization_steps)
    
    train(
        model,
        train_data_loader,
        dev_data_loader,
        test_data_loader
    )
    

# run main

In [22]:
if __name__ == "__main__":
    main()

Namespace(beta_shift=1.0, dataset='mosi', dev_batch_size=128, dropout_prob=0.5, gradient_accumulation_step=1, learning_rate=1e-05, max_seq_length=50, mib='emib', model='bert-base-uncased', n_epochs=2, seed=5576, test_batch_size=128, train_batch_size=2, warmup_proportion=0.1)


[34m[1mwandb[0m: Currently logged in as: [33mdingning[0m (use `wandb login --relogin` to force relogin)


Seed: 5576


Some weights of the model checkpoint at /Users/mac/Desktop/tools/bert-base-uncased were not used when initializing MIB: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing MIB from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MIB from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MIB were not initialized from the model checkpoint at /Users/mac/Desktop/tools

total parameter for the model:  109950948


  q *= self.scaling
Iteration: 100%|███████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.19s/it]
Iteration: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s]


epoch:0, train_loss:2.6149, valid_loss:3.1882, test_acc:0.5000
current mae:1.9492, current acc:0.5000, acc7:0.0000, f1:0.5000, corr:0.5293
best mae:1.9492, acc:0.5000, acc7:0.0000, f1:0.5000, corr:0.5293


  q *= self.scaling
Iteration: 100%|███████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.07s/it]
Iteration: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s]


epoch:1, train_loss:2.6139, valid_loss:3.1888, test_acc:0.5000
current mae:1.9497, current acc:0.5000, acc7:0.0000, f1:0.5000, corr:0.0540
best mae:1.9492, acc:0.5000, acc7:0.0000, f1:0.5000, corr:0.5293


# end

In [23]:
wandb.run

Exception in thread NetStatThr:
Traceback (most recent call last):
  File "/Users/mac/anaconda3/envs/t18/lib/python3.7/threading.py", line 917, in _bootstrap_inner
    self.run()
  File "/Users/mac/anaconda3/envs/t18/lib/python3.7/threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/mac/anaconda3/envs/t18/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 149, in check_network_status
    status_response = self._interface.communicate_network_status()
  File "/Users/mac/anaconda3/envs/t18/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 125, in communicate_network_status
    resp = self._communicate_network_status(status)
  File "/Users/mac/anaconda3/envs/t18/lib/python3.7/site-packages/wandb/sdk/interface/interface_shared.py", line 397, in _communicate_network_status
    resp = self._communicate(req, local=True)
  File "/Users/mac/anaconda3/envs/t18/lib/python3.7/site-packages/wandb/sdk/interface/interface_shared.py", lin