In [32]:
import numpy as np
import sklearn
import os
import pandas as pd
from collections import Counter
from sklearn.metrics import f1_score

In [83]:
# Load predictions
svm = np.load('SVM_preds.npy')
bert = np.load('BERT_preds.npy')
bert_prob = np.load('BERT_preds_proba.npy')

In [84]:
# Load true labels
true = pd.read_csv('valid.tsv', sep = '\t')['Label'].values
true_ = [0 if i == 'UNINFORMATIVE' else 1 for i in true]

In [85]:
def sigmoid(x):
    return 1/(1+np.exp(-x)) * 100

# Extract label and the probability for each predictions
def get_prob(x):
    label = np.argmax(x)
    probability = sigmoid(x[np.argmax(x)])
    return label, probability

In [86]:
# Perform the thresholding and return f1 score
def get_f1_thresh(threshold, bert_prob, bert, svm, true_):
    final_pred = list()
    for idx, pred in enumerate(bert_prob):
        label, prob = get_prob(pred)
        if prob > threshold:
            final_pred.append(label)
        else:
            final_pred.append(svm[idx])
    return f1_score(true_, final_pred, average = 'weighted')

In [73]:
th = np.arange(30,100,1)
scores = list()
for i in th:
    scores.append(get_f1_thresh(i, bert_prob, bert, svm, true_))

In [95]:
# PLot the results
import plotly.express as px
fig = px.line(x=np.arange(0.30,1,0.01), y=scores, labels={'x':'Threshold', 'y':'F1'})
fig.add_shape(
        # Line Horizontal
            type="line",
            x0=0.3,
            y0=f1_score(true_, bert, average = 'weighted'),
            x1=1,
            y1=f1_score(true_, bert, average = 'weighted'),
            line=dict(
                color="LightSeaGreen",
                width=1,
                dash="dashdot",
            ),
    )
fig.show()