# Dataset Explorer

In [34]:
import pickle as pkl
import numpy as np
from tqdm import tqdm
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

with open("./datasets/MOSI/mosi.pkl", 'rb') as f:
    mosi = pkl.load(f)

with open("./datasets/MOSEI/mosei.pkl", 'rb') as f:
    mosei = pkl.load(f)

In [35]:
test_mosi = mosi['test']
test_mosei = mosei['test']

@interact
def get_mosi_sample(idx = range(len(test_mosi))):
    (words, visual, acoustic), label, segment =  mosi['test'][idx]
    print("WORDS", words)
    print("VISUAL", visual.shape)
    print("ACOUSTIC", acoustic.shape)
    print("LABEL", label)
    print("SEGMENT", segment)

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…

In [36]:
@interact
def get_mosi_sample(idx = range(len(test_mosei))):
    (words, visual, acoustic), label, segment =  mosei['test'][idx]
    print("WORDS", words)
    print("VISUAL", visual.shape)
    print("ACOUSTIC", acoustic.shape)
    print("LABEL", label)
    print("SEGMENT", segment)

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…

In [37]:
def validate(data, VISUAL_DIM:int, ACOUSTIC_DIM:int):
    for partition in ["train", "dev", "test"]:
        for feature in tqdm(data[partition]):
            (words, visual, acoustic), label, segment = feature
            assert len(words) == len(visual) == len(acoustic), f"Modality Alignment failure at {segment}"
            assert visual.shape == (len(words), VISUAL_DIM), f"Visual dimension mismatch at {segment}"
            assert acoustic.shape == (len(words), ACOUSTIC_DIM), f"Acoustic dimension mismatch at {segment}"

validate(mosi, 47, 74)
validate(mosei, 35, 74)

100%|██████████| 1281/1281 [00:00<00:00, 472717.18it/s]
100%|██████████| 229/229 [00:00<00:00, 502088.66it/s]
100%|██████████| 685/685 [00:00<00:00, 524958.57it/s]
100%|██████████| 16265/16265 [00:00<00:00, 597412.75it/s]
100%|██████████| 1869/1869 [00:00<00:00, 542164.34it/s]
100%|██████████| 4643/4643 [00:00<00:00, 580538.19it/s]


In [38]:
with open("./datasets/MOSI/test.pkl", 'rb') as f:
    test_mosi = pkl.load(f)

In [39]:
@interact
def get_mosi_sample(idx = range(len(test_mosi))):
    (words, visual, acoustic, actual_words), label, segment =  test_mosi[idx]
    print("WORDS", words.shape)
    print("VISUAL", visual.shape)
    print("ACOUSTIC", acoustic.shape)
    print("ACTUAL WORDS", actual_words)
    print("LABEL", label)
    print("SEGMENT", segment)

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…

In [40]:
with open("./datasets/MOSEI/test.pkl", 'rb') as f:
    test_mosei = pkl.load(f)

In [41]:
@interact
def get_mosei_sample(idx = range(len(test_mosei))):
    (words, visual, acoustic, actual_words, vlen, alen), label, segment =  test_mosei[idx]
    print("WORDS", words.shape)
    print("VISUAL", visual.shape)
    print("ACOUSTIC", acoustic.shape)
    print("ACTUAL WORDS", actual_words)
    print("LABEL", label)
    print("SEGMENT", segment)

interactive(children=(Dropdown(description='idx', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…

# MOSI

## TFN

### 1. Ground-truth Distribution

In [42]:
with open(f"results/TFN.pkl", "rb") as handle:
    # tfn_dict = {segment, labels, labels_2, labels_7, preds, preds_2, preds_7}
    tfn = pkl.load(handle) 

In [43]:
print(len(tfn['segment']))

686


In [44]:
import plotly.express as px
import plotly.subplots as sp
import pandas as pd

d = {'binary': tfn['labels_2'], '7_class': tfn['labels_7']}
df = pd.DataFrame(data=d)
order = ['very negative', 'negative', 'weakly negative', 'Neutral', 'weakly positive', 'positive', 'very positive']

fig1 = px.bar(df, x='binary')
fig2 = px.bar(df, x='7_class')

fig1_traces = []
fig2_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("Binary", "7-class"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2,)

this_figure.update_xaxes(categoryorder='array', categoryarray= order)
this_figure.update_layout(height=500, width=1200, title_text="Ground-truth distribution")
# this_figure.update_yaxes(range=[0,420])
this_figure.show()

### 2. Fusion result distribution

In [45]:
with open(f"results/TFN_16d.pkl", "rb") as handle:
    tfn_16d = pkl.load(handle) 

df1 = pd.DataFrame(data=tfn)
df2 = pd.DataFrame(data=tfn_16d)
order = ['very negative', 'negative', 'weakly negative', 'Neutral', 'weakly positive', 'positive', 'very positive']

fig1 = px.bar(df1, x="preds_7")
fig2 = px.bar(df2, x="preds_7")

fig1_traces = []
fig2_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("TFN-40d", "TFN-16d"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)

this_figure.update_layout(height=600, width=1200, title_text="TFN frozen dimension")
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
this_figure.update_yaxes(range=[0,300])
this_figure.show()

In [46]:
with open(f"results/TFN_fusion.pkl", "rb") as handle:
    tfn_origin = pkl.load(handle) 

df1 = pd.DataFrame(data=tfn)
df2 = pd.DataFrame(data=tfn_origin)
order = ['very negative', 'negative', 'weakly negative', 'Neutral', 'weakly positive', 'positive', 'very positive']

fig1 = px.bar(df1, x="labels_7")
fig2 = px.bar(df2, x="preds_7")
fig3 = px.bar(df1, x="preds_7")

fig1_traces = []
fig2_traces = []
fig3_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])
for trace in range(len(fig3["data"])):
    fig3_traces.append(fig3["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("Grond-truth", "TFN"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)
# for traces in fig3_traces:
#     this_figure.append_trace(traces, row=1, col=3)

this_figure.update_layout(title_text="Fusion result distribution")
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
this_figure.update_yaxes(range=[0,200])
this_figure.show()

### 3. Unimodal result distribution

In [47]:
keys = list(tfn.keys())
for key in keys:
    tfn[key].extend(['very negative', 'negative', 'weakly negative', 'Neutral', 'weakly positive', 'positive', 'very positive'])

df = pd.DataFrame(data=tfn)

fig1 = px.bar(df, x="text_7")
fig2 = px.bar(df, x="audio_7")
fig3 = px.bar(df, x="video_7")
fig4 = px.bar(df, x="preds_7")

fig1_traces = []
fig2_traces = []
fig3_traces = []
fig4_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])
for trace in range(len(fig3["data"])):
    fig3_traces.append(fig3["data"][trace])
for trace in range(len(fig4["data"])):
    fig4_traces.append(fig4["data"][trace])

this_figure = sp.make_subplots(rows=2, cols=2, subplot_titles=("Text-only", "Acoustic-only", "Visual-only", "TFN-fusion"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)
for traces in fig3_traces:
    this_figure.append_trace(traces, row=2, col=1)
for traces in fig4_traces:
    this_figure.append_trace(traces, row=2, col=2)

this_figure.update_layout(height=800, width=1200, title_text="Unimodal results distribution")
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
this_figure.update_yaxes(range=[0,500])
this_figure.show()

## MMIM

In [9]:
import pickle as pkl

with open(f"results/MIM_mosi.pkl", "rb") as handle:
    # tfn_dict = {segment, labels, labels_2, labels_7, preds, preds_2, preds_7}
    mmim = pkl.load(handle)

In [10]:
print(len(mmim['segment']))

686


In [11]:
import plotly.express as px
import plotly.subplots as sp
import pandas as pd

df = pd.DataFrame(data=mmim)
df_gold = df.groupby('labels_7').count().reset_index()
df_pred = df.groupby('preds_7').count().reset_index()

order = ['very negative', 'negative', 'weakly negative', 'Neutral', 'weakly positive', 'positive', 'very positive']

fig1 = px.bar(df_gold, x="labels_7", y="segment")
fig2 = px.bar(df_pred, x="preds_7", y="segment")

fig1_traces = []
fig2_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("Grond-truth", "MMIM"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)

this_figure.update_layout(title_text="7-Classification Result")
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
this_figure.update_yaxes(range=[0,200])
this_figure.show()

# MOSEI

## TFN

### 1. Ground-truth

In [48]:
with open(f"results/TFN_mosei.pkl", "rb") as handle:
    # tfn_dict = {segment, labels, labels_2, labels_7, preds, preds_2, preds_7, preds_text, text_2, text_7, preds_video, ...}
    tfn_mosei = pkl.load(handle) 

In [49]:
print(len(tfn_mosei['segment']))

4654


In [50]:
import plotly.express as px
import plotly.subplots as sp
import pandas as pd

df = pd.DataFrame(data=tfn_mosei)
df_2 = df.groupby('labels_2').count().reset_index()
df_7 = df.groupby('labels_7').count().reset_index()

order = ['very negative', 'negative', 'weakly negative', 'Neutral', 'weakly positive', 'positive', 'very positive']

fig1 = px.bar(df_2, x='labels_2', y='segment')
fig2 = px.bar(df_7, x='labels_7', y='segment')

fig1_traces = []
fig2_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("Binary", "7-class"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)

this_figure.update_xaxes(categoryorder='array', categoryarray= order)
this_figure.update_layout(height=500, width=1200, title_text="Ground-truth distribution")
this_figure.update_yaxes(range=[0,2500])
this_figure.show()

### 2. Fusion result distribution

In [51]:
df = pd.DataFrame(data=tfn_mosei)
df_gold = df.groupby('labels_7').count().reset_index()
df_pred = df.groupby('preds_7').count().reset_index()

order = ['very negative', 'negative', 'weakly negative', 'Neutral', 'weakly positive', 'positive', 'very positive']

fig1 = px.bar(df_gold, x="labels_7", y="segment")
fig2 = px.bar(df_pred, x="preds_7", y="segment")

fig1_traces = []
fig2_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("Grond-truth", "TFN-frozen"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)

this_figure.update_layout(height=800, width=1200, title_text="Fusion result distribution")
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
this_figure.update_yaxes(range=[0,4500])
this_figure.show()

### 3. Unimodal result distribution

In [52]:
# keys = list(tfn.keys())
# for key in keys:
#     tfn[key].extend(['very negative', 'negative', 'weakly negative', 'Neutral', 'weakly positive', 'positive', 'very positive'])

df = pd.DataFrame(data=tfn_mosei)
df_t = df.groupby('text_7').count().reset_index()
df_a = df.groupby('audio_7').count().reset_index()
df_v = df.groupby('video_7').count().reset_index()
df_f = df.groupby('preds_7').count().reset_index()

fig1 = px.bar(df_t, x="text_7", y="segment")
fig2 = px.bar(df_a, x="audio_7", y="segment")
fig3 = px.bar(df_v, x="video_7", y="segment")
fig4 = px.bar(df_f, x="preds_7", y="segment")

fig1_traces = []
fig2_traces = []
fig3_traces = []
fig4_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])
for trace in range(len(fig3["data"])):
    fig3_traces.append(fig3["data"][trace])
for trace in range(len(fig4["data"])):
    fig4_traces.append(fig4["data"][trace])

this_figure = sp.make_subplots(rows=2, cols=2, subplot_titles=("Text-only", "Acoustic-only", "Visual-only", "TFN-fusion"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)
for traces in fig3_traces:
    this_figure.append_trace(traces, row=2, col=1)
for traces in fig4_traces:
    this_figure.append_trace(traces, row=2, col=2)

this_figure.update_layout(height=800, width=1200, title_text="Unimodal results distribution")
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
# this_figure.update_yaxes(range=[0,500])
this_figure.show()

## MMIM

In [57]:
with open(f"results/MMIM_mosei.pkl", "rb") as handle:
    # tfn_dict = {segment, labels, labels_2, labels_7, preds, preds_2, preds_7}
    mmim_mosei = pkl.load(handle)

In [58]:
print(len(mmim_mosei['segment']))

4654


In [61]:
import plotly.express as px
import plotly.subplots as sp
import pandas as pd

df = pd.DataFrame(data=mmim_mosei)
df_gold = df.groupby('labels_7').count().reset_index()
df_pred = df.groupby('preds_7').count().reset_index()

order = ['very negative', 'negative', 'weakly negative', 'Neutral', 'weakly positive', 'positive', 'very positive']

fig1 = px.bar(df_gold, x="labels_7", y="segment")
fig2 = px.bar(df_pred, x="preds_7", y="segment")

fig1_traces = []
fig2_traces = []

for trace in range(len(fig1["data"])):
    fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
    fig2_traces.append(fig2["data"][trace])

this_figure = sp.make_subplots(rows=1, cols=2, subplot_titles=("Grond-truth", "MMIM"))
for traces in fig1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in fig2_traces:
    this_figure.append_trace(traces, row=1, col=2)

this_figure.update_layout(title_text="7-Classification Result")
this_figure.update_xaxes(categoryorder='array', categoryarray= order)
this_figure.update_yaxes(range=[0,2500])
this_figure.show()