# Dataset Explore

In [6]:
import pickle as pkl
import os
from src.dataset import Multimodal_Datasets

In [2]:
dataset_path = '/data1/multimodal/MulT/data/'

## Unaligned set

In [21]:
# Unaligned
mosi_train_dataset = Multimodal_Datasets(dataset_path, data='mosi', split_type='train', if_align=False)
mosi_test_dataset = Multimodal_Datasets(dataset_path, data='mosi', split_type='test', if_align=False)
mosei_train_dataset = Multimodal_Datasets(dataset_path, data='mosei_senti', split_type='train', if_align=False)
mosei_test_dataset = Multimodal_Datasets(dataset_path, data='mosei_senti', split_type='test', if_align=False)

In [4]:
datasets = [mosi_train_dataset, mosi_test_dataset, mosei_train_dataset, mosei_test_dataset]

In [5]:
from tqdm import tqdm
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

@interact
def get_explore(idx=range(len(datasets))):
    name = datasets[idx].data + '_' + datasets[idx].split_type
    print("!! %s !!" % name.upper())
    print("number of modalities               :", datasets[idx].get_n_modalities())
    print("seq_len of (text, audio, vision)   :", datasets[idx].get_seq_len())
    print("get_dim of (text, audio, vision)   :", datasets[idx].get_dim())
    print("(number of labels, label_dim)      :", datasets[idx].get_lbl_info())

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3), value=0), Output()), _dom_classes=('wi…

In [32]:
with open(dataset_path+'mosi_data_noalign.pkl', 'rb') as f:
    mosi = pkl.load(f)
with open(dataset_path+'mosei_senti_data_noalign.pkl', 'rb') as f:
    mosei = pkl.load(f)

In [35]:
train_mosi = mosi['train']
train_mosei = mosei['train']
test_mosi = mosi['test']
test_mosei = mosei['test']

In [31]:
@interact
def get_mosi_sample(idx = range(len(test_mosi['text']))):
    print("TEXT", test_mosi['text'][idx].shape)
    print("VISION", test_mosi['vision'][idx].shape)
    print("AUDIO", test_mosi['audio'][idx].shape)
    print("LABEL", test_mosi['labels'][idx])
    print("ID", test_mosi['id'][idx])

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…

In [34]:
@interact
def get_mosi_sample(idx = range(len(test_mosei['text']))):
    print("TEXT", test_mosei['text'][idx].shape)
    print("VISION", test_mosei['vision'][idx].shape)
    print("AUDIO", test_mosei['audio'][idx].shape)
    print("LABEL", test_mosei['labels'][idx])
    print("ID", test_mosei['id'][idx])

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…

In [36]:
@interact
def get_mosi_sample(idx = range(len(train_mosei['text']))):
    print("TEXT", train_mosei['text'][idx].shape)
    print("VISION", train_mosei['vision'][idx].shape)
    print("AUDIO", train_mosei['audio'][idx].shape)
    print("LABEL", train_mosei['labels'][idx])
    print("ID", train_mosei['id'][idx])

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…

## Aligned set

In [22]:
# Aligned
mosi_train_aligned = Multimodal_Datasets(dataset_path, data='mosi', split_type='train', if_align=True)
mosi_test_aligned = Multimodal_Datasets(dataset_path, data='mosi', split_type='test', if_align=True)
mosei_train_aligned = Multimodal_Datasets(dataset_path, data='mosei_senti', split_type='train', if_align=True)
mosei_test_aligned = Multimodal_Datasets(dataset_path, data='mosei_senti', split_type='test', if_align=True)

In [23]:
aligned_datasets = [mosi_train_aligned, mosi_test_aligned, mosei_train_aligned, mosei_test_aligned]

In [25]:
@interact
def get_explore(idx=range(len(aligned_datasets))):
    name = aligned_datasets[idx].data + '_' + aligned_datasets[idx].split_type + '_aligned'
    print("!! %s !!" % name.upper())
    print("number of modalities               :", aligned_datasets[idx].get_n_modalities())
    print("seq_len of (text, audio, vision)   :", aligned_datasets[idx].get_seq_len())
    print("get_dim of (text, audio, vision)   :", aligned_datasets[idx].get_dim())
    print("(number of labels, label_dim)      :", aligned_datasets[idx].get_lbl_info())

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3), value=0), Output()), _dom_classes=('wi…

In [None]:
with open(dataset_path+'mosi_data.pkl', 'rb') as f:
    mosi = pkl.load(f)