In [1]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
cwd=os.getcwd()
visual_m_path = os.path.join(cwd,'data/imgs_mean_feature_vectors.pkl')
textual_m_path = os.path.join(cwd,'data/docs_extracted_features.pkl')
models_folder_name = os.path.join(cwd,'models')
model_checkpoint_path = os.path.join(cwd,models_folder_name,'gmu.ckpt')



df_visual_m = pd.read_pickle(visual_m_path)
df_textual_m = pd.read_pickle(textual_m_path)

number_of_recipes=len(df_visual_m)
unique_labels=sorted(set(df_visual_m.mean_vector_labels.values))
number_of_classes=len(unique_labels)
possible_class_indices=list(range(0,number_of_classes))
labels2class_indices=dict(zip(unique_labels,possible_class_indices))
print(labels2class_indices)

np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})

{'tiramisu': 3, 'steak': 1, 'sushi': 2, 'sashimi': 0}


In [3]:
test_samples=[3,5,6,
              11,17,18,
              23,26,33,34,
              38,39,44
             ]
all_samples=set(range(0,number_of_recipes))
train_samples=list(all_samples.difference(test_samples))
print("train samples: ", train_samples)
print("test samples: ", test_samples)

train samples:  [0, 1, 2, 4, 7, 8, 9, 10, 12, 13, 14, 15, 16, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 35, 36, 37, 40, 41, 42, 43]
test samples:  [3, 5, 6, 11, 17, 18, 23, 26, 33, 34, 38, 39, 44]


In [4]:
df_visual_m_train=df_visual_m.iloc[train_samples]
df_visual_m_test=df_visual_m.iloc[test_samples]
df_textual_m_train=df_textual_m.iloc[train_samples]
df_textual_m_test=df_textual_m.iloc[test_samples]

visual_m_train_inputs=list(df_visual_m_train.mean_feature_vectors.values)
visual_m_test_inputs=list(df_visual_m_test.mean_feature_vectors.values)

textual_m_train_inputs=list(df_textual_m_train.features.values)
textual_m_test_inputs=list(df_textual_m_test.features.values)

train_correct_class_ids=[labels2class_indices[l] for l in df_visual_m_train.mean_vector_labels]
test_correct_class_ids=[labels2class_indices[l] for l in df_visual_m_test.mean_vector_labels]

number_of_training_samples=len(visual_m_train_inputs)
number_of_test_samples=len(visual_m_test_inputs)
len_of_visual_features_vec=len(visual_m_train_inputs[0])
len_of_textual_features_vec=len(textual_m_train_inputs[0])

print(test_correct_class_ids)
print(np.shape(visual_m_train_inputs))
print(np.shape(textual_m_train_inputs))

[0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3]
(32, 202)
(32, 12)


In [5]:
batch_size=4
learning_rate=0.01
hidden_state_dim = 6
z_dim=1
number_of_training_iterations=500
print_valid_every=10
num_repeat_training=10

In [6]:
def create_training_batch():
    inputs_visual=[]
    inputs_textual=[]
    correct_classes=[]
    for i in range(batch_size):
        train_sample_index=np.random.choice(range(0,number_of_training_samples),1)[0]
        inputs_visual.append(visual_m_train_inputs[train_sample_index])
        inputs_textual.append(textual_m_train_inputs[train_sample_index])
        correct_classes.append(train_correct_class_ids[train_sample_index])
    return np.array(inputs_visual),np.array(inputs_textual),np.array(correct_classes)

inputs_visual,inputs_textual,correct_classes=create_training_batch()
print(np.array(correct_classes))
print(np.shape(inputs_visual))
print(np.shape(inputs_textual))
print(np.shape(correct_classes))

[2 3 2 2]
(4, 202)
(4, 12)
(4,)


In [7]:
visual = tf.placeholder(tf.float32, shape=[None,len_of_visual_features_vec])
textual = tf.placeholder(tf.float32, shape=[None,len_of_textual_features_vec])
target = tf.placeholder(tf.int32, shape=[None])

v_reduced = tf.layers.dense(visual,
                      2*hidden_state_dim,
                      activation=tf.nn.relu)
h_v = tf.layers.dense(v_reduced,
                      hidden_state_dim,
                      activation=tf.nn.tanh)
h_t = tf.layers.dense(textual,
                      hidden_state_dim,
                      activation=tf.nn.tanh)
z = tf.layers.dense(tf.concat([v_reduced,textual], axis=1),
                    z_dim,
                    activation=tf.nn.sigmoid)
h = z * h_v + (1 - z) * h_t


logits = tf.layers.dense(h, number_of_classes)
scores = tf.nn.sigmoid(logits)

multi_class_labels=tf.one_hot(target, depth=number_of_classes)
loss = tf.reduce_mean(tf.losses.sigmoid_cross_entropy(multi_class_labels=multi_class_labels,
                                       logits=logits))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)

correct_prediction=tf.equal(tf.argmax(logits, axis=1), tf.argmax(multi_class_labels,axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

saver = tf.train.Saver(None)

In [8]:
def show_validation_result(accuracy_res, loss_res, scores_res,z_res):
    print("Validation: Loss: ", loss_res," Accuracy: ", accuracy_res)
    df_scores=pd.DataFrame(data={'Class scores':list(np.around(scores_res,4)), 'Correct labels':test_correct_class_ids,
                               'Trust to visual modality': list(np.around(z_res,4)),
                               'Trust to textual modality': list(np.around(1-z_res,4))})
    print(df_scores)  
    

def train(num_of_run):
    with tf.Session() as session:        
        session.run(tf.global_variables_initializer())
    
        print("Start model training")
    
        for train_iter in range(number_of_training_iterations):
            inputs_visual,inputs_textual,correct_classes=create_training_batch()
            _, l = session.run([train_op, loss], {visual: inputs_visual,
                                           textual: inputs_textual,
                                           target: correct_classes})
            print("Training: loss: ", l)
        
            if (train_iter+1)%print_valid_every==0:
                accuracy_res, loss_res, scores_res,z_res = session.run([accuracy, loss, scores, z],
                                                                          {visual: visual_m_test_inputs,
                                                                           textual: textual_m_test_inputs,
                                                                           target: test_correct_class_ids})        
                
                show_validation_result(accuracy_res, loss_res, scores_res,z_res)
                
                if accuracy_res==1.0: 
                    return 0
        return -1

In [9]:
for num_of_run in range(num_repeat_training):            
    if train(num_of_run)==0:
        break

Start model training
Training: loss:  0.94515824
Training: loss:  0.6453772
Training: loss:  0.6705255
Training: loss:  0.49344245
Training: loss:  0.4699042
Training: loss:  0.53477937
Training: loss:  0.57430434
Training: loss:  0.57214457
Training: loss:  0.5650188
Training: loss:  0.5826082
Validation: Loss:  0.5135819  Accuracy:  0.53846157
                        Class scores  Correct labels  \
0     [0.1923, 0.2079, 0.4736, 0.38]               0   
1   [0.1833, 0.2026, 0.4613, 0.3981]               0   
2    [0.1855, 0.2016, 0.468, 0.3888]               0   
3    [0.182, 0.3658, 0.2955, 0.5663]               1   
4   [0.1712, 0.3468, 0.3152, 0.5486]               1   
5   [0.1461, 0.3564, 0.2661, 0.6108]               1   
6   [0.1873, 0.1806, 0.4947, 0.3671]               2   
7   [0.1913, 0.1839, 0.4973, 0.3639]               2   
8   [0.1946, 0.1783, 0.5113, 0.3501]               2   
9   [0.1821, 0.1823, 0.4831, 0.3793]               2   
10   [0.1347, 0.3337, 0.2627, 0.614]

12                     [0.0]                    [1.0]  
Training: loss:  0.34781292
Training: loss:  0.40501183
Training: loss:  0.34235895
Training: loss:  0.29239327
Training: loss:  0.34209633
Training: loss:  0.42731428
Training: loss:  0.3050795
Training: loss:  0.4163676
Training: loss:  0.32390893
Training: loss:  0.28705114
Validation: Loss:  0.32734805  Accuracy:  0.7692308
                        Class scores  Correct labels  \
0   [0.4264, 0.0486, 0.5496, 0.0657]               0   
1   [0.4273, 0.0484, 0.5509, 0.0654]               0   
2    [0.4255, 0.0486, 0.5484, 0.066]               0   
3   [0.2056, 0.7357, 0.1786, 0.3166]               1   
4   [0.2056, 0.7356, 0.1786, 0.3165]               1   
5   [0.2055, 0.7356, 0.1786, 0.3163]               1   
6   [0.4273, 0.0483, 0.5509, 0.0654]               2   
7   [0.4273, 0.0483, 0.5509, 0.0654]               2   
8   [0.4273, 0.0483, 0.5509, 0.0654]               2   
9   [0.4273, 0.0483, 0.5509, 0.0654]               2  

Training: loss:  0.3045875
Training: loss:  0.28878307
Training: loss:  0.222607
Training: loss:  0.22689837
Training: loss:  0.18403608
Training: loss:  0.22302447
Training: loss:  0.28798467
Training: loss:  0.19230458
Validation: Loss:  0.2837403  Accuracy:  0.7692308
                        Class scores  Correct labels  \
0   [0.3379, 0.0215, 0.6332, 0.0586]               0   
1   [0.3384, 0.0215, 0.6341, 0.0583]               0   
2   [0.3375, 0.0216, 0.6327, 0.0587]               0   
3     [0.1023, 0.8022, 0.096, 0.296]               1   
4     [0.1023, 0.8022, 0.096, 0.296]               1   
5    [0.1023, 0.8023, 0.096, 0.2959]               1   
6   [0.3384, 0.0215, 0.6341, 0.0583]               2   
7   [0.3384, 0.0215, 0.6341, 0.0583]               2   
8   [0.3384, 0.0215, 0.6341, 0.0583]               2   
9   [0.3384, 0.0215, 0.6341, 0.0583]               2   
10  [0.0419, 0.1749, 0.0435, 0.6565]               3   
11   [0.0419, 0.175, 0.0436, 0.6563]               3   


Training: loss:  0.21971351
Training: loss:  0.24508578
Training: loss:  0.23629078
Validation: Loss:  0.24823537  Accuracy:  0.7692308
                        Class scores  Correct labels  \
0    [0.494, 0.0207, 0.6348, 0.0713]               0   
1   [0.5253, 0.0222, 0.6442, 0.0734]               0   
2   [0.3842, 0.0136, 0.6236, 0.0571]               0   
3   [0.0689, 0.8452, 0.0591, 0.2518]               1   
4   [0.0689, 0.8453, 0.0592, 0.2518]               1   
5   [0.0688, 0.8452, 0.0591, 0.2516]               1   
6   [0.3782, 0.0129, 0.6266, 0.0553]               2   
7   [0.4106, 0.0146, 0.6306, 0.0591]               2   
8   [0.3904, 0.0135, 0.6281, 0.0567]               2   
9    [0.364, 0.0122, 0.6247, 0.0538]               2   
10  [0.0357, 0.1396, 0.0287, 0.7647]               3   
11  [0.0338, 0.1337, 0.0285, 0.7591]               3   
12  [0.0331, 0.1312, 0.0284, 0.7573]               3   

   Trust to textual modality Trust to visual modality  
0                      

Training: loss:  0.21804176
Training: loss:  0.21040583
Training: loss:  0.17662093
Training: loss:  0.14748365
Training: loss:  0.21259028
Training: loss:  0.16127428
Training: loss:  0.16240399
Training: loss:  0.122350276
Training: loss:  0.11066951
Validation: Loss:  0.19555092  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0   [0.7181, 0.0272, 0.5606, 0.0781]               0   
1   [0.7288, 0.0252, 0.5867, 0.0713]               0   
2   [0.1791, 0.0063, 0.7227, 0.0308]               0   
3   [0.0363, 0.9042, 0.0535, 0.1723]               1   
4   [0.0363, 0.9042, 0.0536, 0.1718]               1   
5   [0.0363, 0.9042, 0.0536, 0.1716]               1   
6   [0.1763, 0.0061, 0.7288, 0.0298]               2   
7   [0.1767, 0.0061, 0.7287, 0.0298]               2   
8   [0.1764, 0.0061, 0.7288, 0.0298]               2   
9   [0.1763, 0.0061, 0.7288, 0.0298]               2   
10   [0.0147, 0.099, 0.0278, 0.7677]               3   
11  [0.0147, 0.0991, 0.

12                     [0.0]                    [1.0]  
Training: loss:  0.18994856
Training: loss:  0.13362618
Training: loss:  0.15612283
Training: loss:  0.083930686
Training: loss:  0.11634989
Training: loss:  0.13709432
Training: loss:  0.11818731
Training: loss:  0.12271696
Training: loss:  0.08906499
Training: loss:  0.14481664
Validation: Loss:  0.16787507  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0    [0.7873, 0.016, 0.3724, 0.0604]               0   
1     [0.8016, 0.0138, 0.4233, 0.05]               0   
2   [0.1332, 0.0036, 0.7634, 0.0249]               0   
3   [0.0242, 0.9195, 0.0524, 0.1575]               1   
4   [0.0242, 0.9194, 0.0524, 0.1571]               1   
5   [0.0242, 0.9195, 0.0524, 0.1571]               1   
6    [0.134, 0.0035, 0.7692, 0.0242]               2   
7   [0.1346, 0.0035, 0.7688, 0.0242]               2   
8    [0.134, 0.0035, 0.7692, 0.0242]               2   
9   [0.1339, 0.0035, 0.7692, 0.0242]               

Training: loss:  0.08622983
Training: loss:  0.09822042
Training: loss:  0.102596715
Training: loss:  0.09261752
Validation: Loss:  0.15855676  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0     [0.839, 0.0104, 0.3326, 0.041]               0   
1      [0.8478, 0.01, 0.3716, 0.034]               0   
2   [0.1039, 0.0024, 0.8265, 0.0217]               0   
3   [0.0182, 0.9263, 0.0511, 0.1491]               1   
4   [0.0182, 0.9263, 0.0511, 0.1489]               1   
5   [0.0182, 0.9263, 0.0511, 0.1488]               1   
6    [0.104, 0.0023, 0.8298, 0.0213]               2   
7   [0.1042, 0.0024, 0.8297, 0.0213]               2   
8    [0.104, 0.0023, 0.8298, 0.0213]               2   
9    [0.104, 0.0023, 0.8298, 0.0213]               2   
10  [0.0077, 0.0552, 0.0277, 0.8713]               3   
11  [0.0077, 0.0552, 0.0277, 0.8713]               3   
12  [0.0077, 0.0552, 0.0277, 0.8713]               3   

   Trust to textual modality Trust to visual modal

Training: loss:  0.06263628
Training: loss:  0.061567716
Training: loss:  0.061167084
Training: loss:  0.05940344
Training: loss:  0.05748473
Training: loss:  0.06301172
Training: loss:  0.08504474
Training: loss:  0.077043265
Validation: Loss:  0.14945656  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0   [0.8607, 0.0078, 0.2031, 0.0428]               0   
1   [0.8739, 0.0068, 0.2648, 0.0302]               0   
2    [0.078, 0.0017, 0.8595, 0.0182]               0   
3     [0.0135, 0.9458, 0.046, 0.112]               1   
4   [0.0135, 0.9457, 0.0462, 0.1119]               1   
5   [0.0135, 0.9457, 0.0461, 0.1119]               1   
6   [0.0787, 0.0017, 0.8643, 0.0176]               2   
7   [0.0787, 0.0017, 0.8643, 0.0176]               2   
8   [0.0787, 0.0017, 0.8643, 0.0176]               2   
9   [0.0787, 0.0017, 0.8643, 0.0176]               2   
10  [0.0056, 0.0516, 0.0267, 0.8814]               3   
11  [0.0056, 0.0516, 0.0266, 0.8813]             

Training: loss:  0.047295623
Training: loss:  0.043154977
Training: loss:  0.04266637
Training: loss:  0.042439796
Training: loss:  0.06930095
Training: loss:  0.043715797
Training: loss:  0.06787787
Training: loss:  0.085258976
Training: loss:  0.042990096
Training: loss:  0.06744246
Validation: Loss:  0.16080907  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0    [0.8687, 0.004, 0.3062, 0.0261]               0   
1   [0.8715, 0.0038, 0.3229, 0.0242]               0   
2   [0.0501, 0.0013, 0.9182, 0.0158]               0   
3     [0.0112, 0.9503, 0.039, 0.101]               1   
4   [0.0107, 0.9498, 0.0403, 0.1004]               1   
5    [0.0111, 0.9503, 0.0392, 0.101]               1   
6   [0.0501, 0.0013, 0.9194, 0.0156]               2   
7   [0.0501, 0.0013, 0.9193, 0.0156]               2   
8   [0.0501, 0.0013, 0.9194, 0.0156]               2   
9   [0.0501, 0.0013, 0.9194, 0.0156]               2   
10   [0.004, 0.0404, 0.0275, 0.9054]          

Training: loss:  0.80027413
Training: loss:  0.62253237
Training: loss:  0.65014774
Training: loss:  0.592446
Training: loss:  0.5884546
Training: loss:  0.5350144
Training: loss:  0.5227437
Training: loss:  0.4287794
Training: loss:  0.5046048
Training: loss:  0.6371596
Validation: Loss:  0.49511603  Accuracy:  0.46153846
                        Class scores  Correct labels  \
0     [0.2179, 0.321, 0.3124, 0.175]               0   
1   [0.2388, 0.2901, 0.4068, 0.1477]               0   
2    [0.1736, 0.3195, 0.4363, 0.248]               0   
3   [0.2078, 0.3419, 0.2472, 0.1884]               1   
4   [0.2066, 0.3435, 0.2434, 0.1898]               1   
5   [0.2238, 0.3107, 0.3301, 0.1619]               1   
6   [0.1731, 0.2825, 0.5714, 0.2248]               2   
7    [0.1913, 0.285, 0.5157, 0.1971]               2   
8   [0.1804, 0.2844, 0.5448, 0.2136]               2   
9    [0.1608, 0.2823, 0.604, 0.2472]               2   
10   [0.148, 0.3441, 0.3672, 0.2939]               3   
11 

Validation: Loss:  0.3231097  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0   [0.5657, 0.3072, 0.1331, 0.1101]               0   
1   [0.6407, 0.2744, 0.1628, 0.0737]               0   
2    [0.1057, 0.0642, 0.715, 0.2224]               0   
3     [0.4056, 0.54, 0.0288, 0.2518]               1   
4    [0.3786, 0.5171, 0.032, 0.2474]               1   
5     [0.386, 0.5238, 0.031, 0.2488]               1   
6    [0.1118, 0.055, 0.7642, 0.1949]               2   
7   [0.1118, 0.0551, 0.7641, 0.1949]               2   
8    [0.1118, 0.055, 0.7642, 0.1949]               2   
9    [0.1118, 0.055, 0.7642, 0.1948]               2   
10  [0.1556, 0.3479, 0.1189, 0.5747]               3   
11  [0.1473, 0.3362, 0.1256, 0.5715]               3   
12  [0.1189, 0.2936, 0.1542, 0.5598]               3   

   Trust to textual modality Trust to visual modality  
0                    [1e-04]                 [0.9999]  
1                      [0.0]                    [1.0

Training: loss:  0.19805028
Training: loss:  0.20478334
Training: loss:  0.14301684
Training: loss:  0.21565421
Validation: Loss:  0.243651  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0   [0.6841, 0.2165, 0.1108, 0.0284]               0   
1   [0.6883, 0.2133, 0.1132, 0.0278]               0   
2    [0.1393, 0.0361, 0.7497, 0.069]               0   
3     [0.27, 0.6378, 0.0119, 0.1757]               1   
4   [0.2701, 0.6376, 0.0119, 0.1756]               1   
5     [0.27, 0.6378, 0.0119, 0.1757]               1   
6   [0.0779, 0.0209, 0.8773, 0.1177]               2   
7    [0.079, 0.0212, 0.8752, 0.1162]               2   
8    [0.0781, 0.021, 0.8768, 0.1174]               2   
9   [0.0778, 0.0209, 0.8775, 0.1179]               2   
10   [0.0738, 0.337, 0.0769, 0.5818]               3   
11   [0.0738, 0.337, 0.0769, 0.5815]               3   
12   [0.0738, 0.337, 0.0769, 0.5818]               3   

   Trust to textual modality Trust to visual modality

Training: loss:  0.12738818
Training: loss:  0.17166269
Training: loss:  0.15182973
Training: loss:  0.10544015
Validation: Loss:  0.23448627  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0     [0.7549, 0.1539, 0.08, 0.0199]               0   
1   [0.7557, 0.1539, 0.0799, 0.0198]               0   
2   [0.0638, 0.0097, 0.9203, 0.0859]               0   
3   [0.2108, 0.6254, 0.0063, 0.1992]               1   
4   [0.2108, 0.6254, 0.0063, 0.1992]               1   
5   [0.2108, 0.6254, 0.0063, 0.1992]               1   
6    [0.0603, 0.0091, 0.9265, 0.088]               2   
7    [0.0603, 0.0091, 0.9265, 0.088]               2   
8    [0.0603, 0.0091, 0.9265, 0.088]               2   
9    [0.0603, 0.0091, 0.9265, 0.088]               2   
10  [0.0373, 0.2101, 0.0579, 0.7295]               3   
11  [0.0373, 0.2101, 0.0579, 0.7295]               3   
12  [0.0373, 0.2101, 0.0579, 0.7295]               3   

   Trust to textual modality Trust to visual modali

Training: loss:  0.13741827
Training: loss:  0.14924023
Training: loss:  0.13063142
Training: loss:  0.066973954
Training: loss:  0.15807903
Training: loss:  0.062330693
Training: loss:  0.12515938
Training: loss:  0.048647314
Validation: Loss:  0.2067326  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0   [0.8173, 0.1334, 0.0485, 0.0113]               0   
1   [0.8177, 0.1335, 0.0484, 0.0113]               0   
2   [0.0571, 0.0046, 0.9372, 0.0691]               0   
3   [0.1539, 0.7113, 0.0032, 0.1644]               1   
4   [0.1539, 0.7112, 0.0032, 0.1644]               1   
5   [0.1539, 0.7113, 0.0032, 0.1644]               1   
6   [0.0535, 0.0043, 0.9425, 0.0717]               2   
7   [0.0535, 0.0043, 0.9425, 0.0717]               2   
8   [0.0535, 0.0043, 0.9425, 0.0717]               2   
9   [0.0535, 0.0043, 0.9425, 0.0717]               2   
10     [0.0192, 0.17, 0.0391, 0.805]               3   
11     [0.0192, 0.17, 0.0391, 0.805]              

Training: loss:  0.06608871
Validation: Loss:  0.19847944  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0   [0.8662, 0.1074, 0.0358, 0.0067]               0   
1   [0.8666, 0.1075, 0.0357, 0.0067]               0   
2   [0.0418, 0.0023, 0.9598, 0.0462]               0   
3   [0.1356, 0.7449, 0.0018, 0.1466]               1   
4   [0.1354, 0.7447, 0.0019, 0.1465]               1   
5   [0.1357, 0.7449, 0.0018, 0.1467]               1   
6    [0.0392, 0.0021, 0.963, 0.0485]               2   
7    [0.0392, 0.0021, 0.963, 0.0485]               2   
8    [0.0392, 0.0021, 0.963, 0.0485]               2   
9    [0.0392, 0.0021, 0.963, 0.0485]               2   
10  [0.0113, 0.1202, 0.0296, 0.8592]               3   
11  [0.0113, 0.1202, 0.0296, 0.8592]               3   
12  [0.0113, 0.1202, 0.0296, 0.8592]               3   

   Trust to textual modality Trust to visual modality  
0                      [0.0]                    [1.0]  
1                      

Training: loss:  0.0471453
Training: loss:  0.065672174
Training: loss:  0.07293436
Training: loss:  0.06839329
Training: loss:  0.07004732
Training: loss:  0.04346557
Training: loss:  0.06539256
Validation: Loss:  0.18926595  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0   [0.9046, 0.0979, 0.0266, 0.0043]               0   
1   [0.9053, 0.0982, 0.0264, 0.0043]               0   
2   [0.0353, 0.0015, 0.9686, 0.0348]               0   
3    [0.0886, 0.793, 0.0023, 0.0932]               1   
4   [0.0603, 0.7715, 0.0042, 0.0726]               1   
5    [0.102, 0.8007, 0.0018, 0.1022]               1   
6   [0.0334, 0.0014, 0.9705, 0.0367]               2   
7   [0.0334, 0.0014, 0.9705, 0.0367]               2   
8   [0.0334, 0.0014, 0.9705, 0.0367]               2   
9   [0.0334, 0.0014, 0.9705, 0.0367]               2   
10  [0.0082, 0.1158, 0.0235, 0.8778]               3   
11  [0.0082, 0.1158, 0.0235, 0.8778]               3   
12  [0.0082, 0.1158, 0.0

12                     [0.0]                    [1.0]  
Training: loss:  0.0435314
Training: loss:  0.06049829
Training: loss:  0.032203205
Training: loss:  0.045986786
Training: loss:  0.04437451
Training: loss:  0.042603705
Training: loss:  0.045724384
Training: loss:  0.045354985
Training: loss:  0.045938052
Training: loss:  0.060866818
Validation: Loss:  0.1852801  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0    [0.9287, 0.0871, 0.022, 0.0037]               0   
1     [0.93, 0.0874, 0.0215, 0.0037]               0   
2   [0.0286, 0.0014, 0.9744, 0.0273]               0   
3    [0.0099, 0.7973, 0.048, 0.0196]               1   
4   [0.0096, 0.7968, 0.0495, 0.0192]               1   
5   [0.0084, 0.7936, 0.0596, 0.0175]               1   
6   [0.0271, 0.0013, 0.9759, 0.0288]               2   
7   [0.0271, 0.0013, 0.9759, 0.0288]               2   
8   [0.0271, 0.0013, 0.9759, 0.0288]               2   
9   [0.0271, 0.0013, 0.9759, 0.0288]           

                        Class scores  Correct labels  \
0   [0.9474, 0.0653, 0.0163, 0.0037]               0   
1   [0.9477, 0.0654, 0.0162, 0.0037]               0   
2   [0.0236, 0.0013, 0.9761, 0.0229]               0   
3    [0.0101, 0.8742, 0.0315, 0.019]               1   
4   [0.0095, 0.8745, 0.0343, 0.0181]               1   
5   [0.0082, 0.8754, 0.0413, 0.0164]               1   
6   [0.0221, 0.0013, 0.9778, 0.0241]               2   
7   [0.0221, 0.0013, 0.9778, 0.0241]               2   
8   [0.0221, 0.0013, 0.9778, 0.0241]               2   
9   [0.0221, 0.0013, 0.9778, 0.0241]               2   
10  [0.0071, 0.0823, 0.0158, 0.9278]               3   
11  [0.0071, 0.0823, 0.0158, 0.9278]               3   
12  [0.0071, 0.0823, 0.0158, 0.9278]               3   

   Trust to textual modality Trust to visual modality  
0                      [0.0]                    [1.0]  
1                      [0.0]                    [1.0]  
2                      [0.0]                   

Training: loss:  1.00385
Training: loss:  0.7393751
Training: loss:  0.75519776
Training: loss:  0.69465435
Training: loss:  0.6422858
Training: loss:  0.48457208
Training: loss:  0.63426816
Training: loss:  0.50871205
Training: loss:  0.5723153
Training: loss:  0.64501345
Validation: Loss:  0.5695657  Accuracy:  0.53846157
                        Class scores  Correct labels  \
0    [0.541, 0.2374, 0.8183, 0.3296]               0   
1   [0.5749, 0.2162, 0.8226, 0.3272]               0   
2   [0.5406, 0.2228, 0.8334, 0.2985]               0   
3   [0.3294, 0.8089, 0.2736, 0.5686]               1   
4    [0.3621, 0.672, 0.3511, 0.4715]               1   
5   [0.3258, 0.8027, 0.4461, 0.4691]               1   
6   [0.5733, 0.2273, 0.8412, 0.3331]               2   
7    [0.586, 0.1429, 0.8915, 0.2615]               2   
8   [0.5938, 0.1714, 0.8717, 0.2849]               2   
9   [0.5871, 0.1563, 0.8877, 0.2675]               2   
10  [0.4422, 0.3827, 0.6703, 0.3395]               3   
11

Training: loss:  0.17874908
Training: loss:  0.23129502
Training: loss:  0.24676919
Training: loss:  0.30046418
Training: loss:  0.3226568
Training: loss:  0.2760545
Validation: Loss:  0.2600156  Accuracy:  0.9230769
                        Class scores  Correct labels  \
0   [0.6617, 0.1952, 0.5207, 0.4563]               0   
1   [0.6158, 0.2193, 0.4747, 0.2978]               0   
2   [0.5714, 0.0607, 0.6835, 0.2564]               0   
3   [0.2433, 0.8694, 0.0934, 0.1277]               1   
4    [0.176, 0.9006, 0.0622, 0.2695]               1   
5   [0.2337, 0.8997, 0.2688, 0.1825]               1   
6   [0.1667, 0.0234, 0.8941, 0.1003]               2   
7   [0.1671, 0.0235, 0.8933, 0.1007]               2   
8   [0.1676, 0.0235, 0.8936, 0.1004]               2   
9   [0.1667, 0.0232, 0.8944, 0.1001]               2   
10  [0.5379, 0.1857, 0.1591, 0.8928]               3   
11   [0.1151, 0.1219, 0.1719, 0.614]               3   
12   [0.4966, 0.072, 0.4112, 0.7051]               3   