In [91]:
import os
from google.colab import drive
drive.mount("/content/drive")
os.chdir("/content/drive/My Drive/WNUT")

import random
import argparse
import time
import datetime

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
import tensorflow as tf
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, BertConfig, get_linear_schedule_with_warmup, AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from tqdm import tqdm

seed=103
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<torch._C.Generator at 0x7fe483030d08>

In [3]:
def encode_label(label):
    """
    Convert UNINFORMATIVE to 0 and INFORMATIVE to 1
    """
    if label == "UNINFORMATIVE": return 0
    else: return 1

def loadFile(file, device, tok:str):
    """
    Load file and apply preprocessing for BERT model
    """
    df = pd.read_csv(file, sep='\t')
    df.Label = df.Label.apply(lambda x: encode_label(x))

    X = df.Text
    y = df.Label

    # Define tokenizer
    tokenizer = AutoTokenizer.from_pretrained(tok)

    # Encode sentences to ids
    input_ids = list()
    for sent in tqdm(X):
        encoded_sent = tokenizer.encode(sent, 
                                        add_special_tokens = True,
                                        truncation = True,
                                        max_length = 128) 
                                        #return_tensors = 'pt')

        input_ids.append(encoded_sent)

    # Pad/truncate sentences
    input_ids = tf.keras.preprocessing.sequence.pad_sequences(input_ids,
                                                                maxlen=128,
                                                                dtype='long',
                                                                value=0,
                                                                truncating='post',
                                                                padding='post')

    # Attention Masks
    attention_masks = list()
    for sent in input_ids:
        att_mask = [int(token_id > 0) for token_id in sent]
        attention_masks.append(att_mask)

    X = torch.tensor(input_ids).to(device)
    y = torch.tensor(y).to(device)
    attention_masks = torch.tensor(attention_masks)

    return X, y, attention_masks

def makeDataLoader(X, y, attention_masks):
    """
    Make PyTorch iterator
    """
    batch_size = 16

    data = TensorDataset(X, attention_masks, y)
    dataloader = DataLoader(data, batch_size=batch_size)

    return dataloader

def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

def findDevice():
    # If there's a GPU available...
    if torch.cuda.is_available():    

        # Tell PyTorch to use the GPU.    
        device = torch.device("cuda")

        print('There are %d GPU(s) available.' % torch.cuda.device_count())

        print('We will use the GPU:', torch.cuda.get_device_name(0))

    # If not...
    else:
        print('No GPU available, using the CPU instead.')
        device = torch.device("cpu")

    return device

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

def modelEval(model, data, device):

    preds = np.array([]).reshape(0,2)

    t0 = time.time()

    # Put the model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    model.eval()

    # Tracking variables 
    eval_loss, eval_accuracy, eval_f1 = 0, 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0

    # Evaluate data for one epoch
    for batch in data:
        
        # Add batch to cpu
        batch = tuple(t.to(device) for t in batch)
        
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        
        # Telling the model not to compute or store gradients, saving memory and
        # speeding up validation
        with torch.no_grad():        

            outputs = model(b_input_ids, 
                            token_type_ids=None, 
                            attention_mask=b_input_mask)
        
        logits = outputs[0]

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        preds = np.vstack((preds,logits))
        
        # Calculate the accuracy for this batch of test sentences.
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)   # acc
        tmp_eval_f1 = f1_score(np.argmax(logits, axis = 1).flatten(), label_ids.flatten(), average="weighted")                     # f1
        
        # Accumulate the total accuracy.
        eval_accuracy += tmp_eval_accuracy
        eval_f1 += tmp_eval_f1

        # Track the number of batches
        nb_eval_steps += 1

    # Report the final accuracy for this validation run.
    print("  Accuracy: {0:.5f}".format(eval_accuracy/nb_eval_steps))
    print("  F1: {0:.5f}".format(eval_f1/nb_eval_steps))
    print("  Validation took: {:}".format(format_time(time.time() - t0)))

    return preds

In [8]:
if __name__ == "__main__":
    
    device = findDevice()

    train_data_path = "data/train_lower_entities.tsv"
    test_data_path = "data/valid_lower_entities.tsv"

    X_base, y_base, mask_base = loadFile(test_data_path, device, "bert-base-uncased")
    X_roberta, y_roberta, mask_roberta = loadFile(test_data_path, device, "roberta-base")
    X_covid, y_covid, mask_covid = loadFile(test_data_path, device, "digitalepidemiologylab/covid-twitter-bert")

    base = makeDataLoader(X_base, y_base, mask_base)
    roberta = makeDataLoader(X_roberta, y_roberta, mask_roberta)
    covid = makeDataLoader(X_covid, y_covid, mask_covid)

    model_base = AutoModelForSequenceClassification.from_pretrained('models/bert_text').to(device)
    model_roberta = AutoModelForSequenceClassification.from_pretrained('models/roberta_text').to(device)
    model_covid = AutoModelForSequenceClassification.from_pretrained('models/covid-bert-fine-tuned1').to(device)

    preds_base = modelEval(model_base, base, device)
    preds_roberta = modelEval(model_roberta, roberta, device)
    preds_covid = modelEval(model_covid, covid, device)

There are 1 GPU(s) available.
We will use the GPU: Tesla K80


100%|██████████| 1000/1000 [00:00<00:00, 1092.91it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1212.95it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1385.36it/s]


  Accuracy: 0.88294
  F1: 0.88341
  Validation took: 0:00:15
  Accuracy: 0.89583
  F1: 0.89588
  Validation took: 0:00:15
  Accuracy: 0.92063
  F1: 0.92064
  Validation took: 0:00:49


In [10]:
preds_base_labels = np.argmax(preds_base, axis=1).flatten()
preds_roberta_labels = np.argmax(preds_roberta, axis=1).flatten()
preds_covid_labels = np.argmax(preds_covid, axis=1).flatten()

In [26]:
feats = []
for i in range(len(preds_base_labels)):
    instance = [preds_base_labels[i], preds_roberta_labels[i], preds_covid_labels[i], \
                preds_base[i][0], preds_base[i][1],\
                preds_roberta[i][0], preds_roberta[i][1],\
                preds_covid[i][0], preds_covid[i][0]]
    for j in range(len(instance)):
        if type(instance[j]) == np.ndarray:
            instance[j] = float(instance[j][0])
        else:
            instance[j] = float(instance[j])
    feats.append(instance)

In [133]:
# Hard Majority Voting

finals = []
for i in range(len(preds_base_labels)):
  pred1 = preds_base_labels[i]
  pred2 = preds_roberta_labels[i]
  pred3 = preds_covid_labels[i]

  zeros, ones = 0, 0

  if pred1 == 0: zeros += 1
  else: ones += 1

  if pred2 == 0: zeros += 1
  else: ones += 1

  if pred3 == 0: zeros += 1
  else: ones += 1

  if zeros > ones: finals.append(0)
  else: finals.append(1)

In [134]:
f1_score(finals, y_base.cpu(), average="weighted")

0.8999219297367631

In [147]:
# Soft Majority Voting

import math
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

finals = []
for i in range(len(preds_base_labels)):
  base0, base1 = sigmoid(preds_base[i][0]),sigmoid(preds_base[i][1])
  roberta0, roberta1 = sigmoid(preds_base[i][0]),sigmoid(preds_base[i][1])
  covid0, covid1 = sigmoid(preds_base[i][0]),sigmoid(preds_base[i][1])

  mean0, mean1 = np.mean(np.array((base0,roberta0,covid0))), np.mean(np.array((base1,roberta1,covid1)))

  p = np.array((mean0,mean1))

  finals.append(np.argmax(p).flatten()[0])

#print(finals)

f1_score(finals, y_base.cpu(), average="weighted")

0.8819092831059003