In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, cohen_kappa_score, f1_score
import torch
from torch.nn import Softmax
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm

In [None]:
"""Initializing the root path"""

path = "./predictions/"

In [None]:
"""Making the predictions and labels corresposding to the predictions"""

labels = np.load(f"{path}/labels.npy")

pred1 = np.load(f"{path}bert_finetuned.npy")
pred1 = Softmax(dim=1)(torch.tensor(pred1)).numpy()

pred2 = np.load(f"{path}gnn_fasttext_finetuned.npy")

In [None]:
tv_pred1, test_pred1 = train_test_split(pred1, test_size=0.2, shuffle=True, random_state=0)
tv_pred2, test_pred2 = train_test_split(pred2, test_size=0.2, shuffle=True, random_state=0)
tv_labels, test_labels = train_test_split(labels, test_size=0.2, shuffle=True, random_state=0)

In [None]:
"""Running late fusion with 5-fold cross validation to find the optimal weights for predictions from two models"""

kfcv = KFold(n_splits=5, random_state=42, shuffle=True)
run = 1
splits = []

for train_index, val_index in kfcv.split(tv_pred1):
    train_pred1, train_pred2, train_labels = tv_pred1[train_index], tv_pred2[train_index], tv_labels[train_index]
    val_pred1, val_pred2, val_labels = tv_pred1[val_index], tv_pred2[val_index], tv_labels[val_index]
    scores = []
    print(f"Run: {run}")
    run += 1
    for x in tqdm(np.linspace(0,1,201)):
        late_fusion = (train_pred1*x) + (train_pred2*(1-x))
        pred_class = np.argmax(late_fusion, axis=1)
        scores.append([f1_score(train_labels, pred_class, average='weighted'), x])
    scores = sorted(scores, reverse=True)
    split = scores[0][1]
    print(f"Split value: {split}\n")
    
    late_fusion = (val_pred1*split) + (val_pred2*(1-split))
    pred_class = np.argmax(late_fusion, axis=1)
    splits.append([f1_score(val_labels, pred_class, average='weighted'), split])

In [None]:
"""Testing corresponding to the best split"""

splits = sorted(splits, reverse=True)
best_split = splits[0][1]
print(f"Best weight splited: {best_split} for finetuned bert and {np.round(1-best_split,3)} for finetuned fastttext intitalized GCN\n")

late_fusion = (test_pred1*best_split) + (test_pred2*(1-best_split))
pred_class = np.argmax(late_fusion, axis=1)
print(classification_report(test_labels, pred_class, digits=7))
print(f"Cohen Kappa: {cohen_kappa_score(test_labels, pred_class)}")

In [None]:
"""Maximium of prediction from two models"""

late_fusion = np.maximum(test_pred1, test_pred2)
pred_class = np.argmax(late_fusion, axis=1)
print(classification_report(test_labels, pred_class, digits=7))
print(f"Cohen Kappa: {cohen_kappa_score(test_labels, pred_class)}")