In [1]:
import os
import re
import json
import numpy as np
from sklearn import metrics

img_dir = '/hy-tmp/data/dataset_image'
train_file = '/hy-tmp/data/data-of-multimodal-sarcasm-detection/text/train.txt'
valid_file = '/hy-tmp/data/data-of-multimodal-sarcasm-detection/text/valid2.txt'
test_file = '/hy-tmp/data/data-of-multimodal-sarcasm-detection/text/test2.txt'

image_files = os.listdir(img_dir)

CM_BERT_predicts = '/root/results/CM_BERT_predicts.txt'
CM_VIT_predicts = '/root/results/CM_VIT_predicts.txt'
CM_BERT_TEXT_IN_IMG_TEXT_predicts = '/root/results/CM_BERT_TEXT_IN_IMG_TEXT_predicts.txt'
# CM_GCN_predicts = '/root/results/CM_GCN_predicts.txt'

In [3]:
def load_all_data(data_file):
    all_data = {}
    with open(data_file,'r',encoding='utf-8') as fin:
        lines = fin.readlines()
        lines = [x.strip() for x in lines]
        for i in range(len(lines)):
            line = lines[i]
            data = eval(line)
            if 'train' in test_file:
                img_id,text,label = data
            else:
                img_id,text,label1,label = data

            image_file = img_id+'.jpg'
            if image_file in image_files:
                all_data[img_id] = {'image_file': image_file, 'label':int(label)}
    return all_data

def load_predicts(predicts_file, all_data):
    logits = {}
    with open(predicts_file,'r',encoding='utf-8') as fin:
        lines = fin.readlines()
        lines = [x.strip() for x in lines]
        for i in range(len(lines)):
            line = lines[i]
            data = line.split()
            img_id, predict, label, logit1, logit2 = data
            
            if img_id in all_data:
                logit1 = float(re.findall('-?\d+(?:\.\d+)?', logit1)[0])
                logit2 = float(re.findall('-?\d+(?:\.\d+)?', logit2)[0])
                logits[img_id] = [logit1, logit2]
    
    return logits

def evaluate_acc_f1(logits_list, all_data, method='mean'):
    num_model = len(logits_list)
    labels = []
    logits_tmp = [[] for i in range(num_model)]
    
    for img_id in all_data:
        label = all_data[img_id]['label']
        labels.append(label)
        for i in range(num_model):
            model_logits = logits_list[i]
            logit = model_logits[img_id]
            logits_tmp[i].append(logit)
    for i in range(num_model):
        logits_tmp[i] = np.array(logits_tmp[i])
        assert len(labels) == logits_tmp[i].shape[0]
    
    stacked_logits = np.stack(logits_tmp,axis=0)
    if method=='sum':
        logits = np.sum(stacked_logits, axis=0)
    elif method=='max':
        logits = np.max(stacked_logits, axis=0)
    elif method=='mean':
        logits = np.mean(stacked_logits, axis=0)
    else:
        print('fusion method not find, use sum methon')
        logits = np.sum(stacked_logits, axis=0)
    
    predicts = np.argmax(logits, axis=1)
    acc = metrics.accuracy_score(labels, predicts)
    f1 = metrics.f1_score(labels, predicts)
    precision =  metrics.precision_score(labels, predicts)
    recall = metrics.recall_score(labels, predicts)
    
    return acc, f1, precision, recall

In [4]:
all_data = load_all_data(test_file)

print('len of all test data:', len(all_data))

CM_BERT_logits = load_predicts(CM_BERT_predicts, all_data)
CM_VIT_logits = load_predicts(CM_VIT_predicts, all_data)
CM_BERT_TEXT_IN_IMG_TEXT_logits = load_predicts(CM_BERT_TEXT_IN_IMG_TEXT_predicts, all_data)
# CM_GCN_logits = load_predicts(CM_GCN_predicts, all_data)

fusion_models = [CM_BERT_logits, CM_VIT_logits, CM_BERT_TEXT_IN_IMG_TEXT_logits]

result = evaluate_acc_f1(fusion_models, all_data, method='mean')
print(result)

len of all test data: 2409
(0.8613532586135326, 0.8274793388429752, 0.81985670419652, 0.835245046923879)
