In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" # the GPU on robinson

In [2]:
import numpy as np
import pandas as pd
from collections import deque
import copy

import torch
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertForPreTraining
from transformers import BertModel, AdamW, AutoTokenizer, BertForSequenceClassification, RobertaForSequenceClassification
from torch.optim.lr_scheduler import ReduceLROnPlateau

from tqdm import tqdm, trange

import emoji
from nltk.corpus import stopwords

random_seed = 0
torch.manual_seed(random_seed)

conspiracies = ['Suppressed Cures',
     'Behaviour and Mind Control',
     'Antivax',
     'Fake virus',
     'Intentional Pandemic',
     'Harmful Radiation/ Influence',
     'Population reduction',
     'New World Order',
     'Satanism']

In [3]:
#model_name = 'bert-base-cased'
# model_name = 'roberta-base'
# model_name = 'covid-twitter-bert'
model_name = 'roberta-large'

replace_lowercase_flag = False
remove_stopwords_flag = False
remove_hashtags_flag = True
replace_emojis_flag = True
clean_tweets_flag = False

all_data = False
class_weights_flag = True # always true for now

classification = True

# fold
k=0

In [4]:
def clean_tweets(tweets):
    char_to_remove = ['\n', '\xa0']
    corona_synonyms = ['coronavirus',
                      'covid-19',
                      'covid19',
                      'covid 19',
                      'covid',
                      'corona',
                      'sarscov2'
                      'sars',
                      'Coronaviruses',
                      'Coronavirus',
                      'Corona',
                      'Covid19',
                      'COVID19',
                      'Covid-19',
                      'COVID-19',
                      'COVID 19',
                      'Covid',
                      'COVID',
                      'SARSCOV2',
                      'SARS']
    
    tweets_clean = []
    for tw in tweets:
        for c in char_to_remove:
            tw = tw.replace(c, '')
        tw = tw.replace('&amp;', '&')
        
        for syn in corona_synonyms:
            if syn in tw:
                tw = tw.replace(syn, 'wuhan virus')
        tweets_clean.append(tw)
    return tweets_clean

def extract_hashtags(tweet):
    # Returns hashtags in a list for a given tweet
    
    #tweet = tweet.replace('\xa0','')
    #tweet = tweet.replace('\n','')
    
    tweet_words = tweet.split(' ')
    tweet_words = [w for w in tweet_words if w!='']
    hashtags = []
    for word in tweet_words:
        if word[0]=='#':
            hashtags.append(word)
    return hashtags

def extract_emojis (tw):
    # Returns emojis in a list for a given tweet
    # Using Deque for a sliding window (emojis can be combined together to form other emojis)
    
    emojis = []
    
    l = []
    max_l = 7
    
    for i in range(0, max_l):
        l.append(tw[-1-i])
    l = deque(l, maxlen=max_l)
    skip=0
    
    for i in range (0, len(tw)):
        if skip == 0:
            for j in range (max_l-1, -1, -1):
                str_to_test = ''
                for k in range (0, j+1):
                    str_to_test+=l[j-k]
                if str_to_test in emoji.UNICODE_EMOJI['en']:
                    
                    emojis.append(str_to_test)
                    skip=j
                    break
            try:
                l.append(tw[-1-i-max_l])
            except IndexError:
                l.append('')
        else:
            skip=skip-1
            try:
                l.append(tw[-1-i-max_l])
            except IndexError:
                l.append('')
    emojis.reverse()
    return emojis

In [5]:
def to_lowercase(tweets):
    tweets_lowercase = []
    for tw in tweets:
        tweets_lowercase.append(tw.lower())
    return tweets_lowercase

def remove_stopwords(tweets):
    
    stop_words = set(stopwords.words('english'))
    
    tweets_no_stopwords = []
    for tw in tweets:
        tw = tw.split(' ')
        tweets_no_stopwords.append(' '.join([word for word in tw if not word in stop_words]))

    return tweets_no_stopwords

def remove_hashtags(tweets):
    tweets = [tw.replace('#', '') for tw in tweets]
    return tweets

def replace_emojis(tweets):
    tweets_no_emojis = []
    for tw in tweets:
        emojis = extract_emojis(tw)
        for e in emojis:
            e_text = emoji.UNICODE_EMOJI['en'][e].replace('_',' ').replace(':', '')
            tw = tw.replace(e, e_text)
        tweets_no_emojis.append(tw)

    return tweets_no_emojis
    
        

In [6]:
data_path = './mediaeval-fakenews/data/task2/'
filelist = os.listdir(data_path)


df_list = [pd.read_csv(data_path+file) for file in filelist]


test_df = df_list[k]    
train_df = pd.concat(df_list[:k]+df_list[k+1:])


tw_train = train_df['tweet'].tolist()
tw_test = test_df['tweet'].tolist()

if all_data:
    df = pd.read_csv('./mediaeval-fakenews/data/dev-full-task-2-clean.csv')
    tw_train = df['tweet']
    labels_train = df.iloc[:,1:10].values.tolist()

if clean_tweets_flag:
    tw_train = clean_tweets(tw_train)
    tw_test = clean_tweets(tw_test)

if replace_lowercase_flag:
    tw_train = to_lowercase(tw_train)
    tw_test = to_lowercase(tw_test)

if remove_stopwords_flag:
    tw_train = remove_stopwords(tw_train)
    tw_test = remove_stopwords(tw_test)

if remove_hashtags_flag:
    tw_train = remove_hashtags(tw_train)
    tw_test = remove_hashtags(tw_test)

if replace_emojis_flag:
    tw_train = replace_emojis(tw_train)
    tw_test = replace_emojis(tw_test)

weights_tmp = [0,0,0,0,0,0,0,0,0]
weights = [1, 1, 1, 1, 1, 1, 1, 1, 1]
if not all_data:
    labels_train = train_df.iloc[:,1:10].values.tolist()
labels_test = test_df.iloc[:,1:10].values.tolist()
ids_test = test_df['ids'].tolist()

for i in range(0, 9):
    for j in range(0, len(labels_train)):
        weights_tmp[i]+=labels_train[j][i]
        
weights = [len(labels_train)/w for w in weights_tmp]
#weights = [1/w for w in weights_tmp]

weights = torch.FloatTensor(weights).cuda()
weights

tensor([44.3929, 10.9035,  8.1242,  6.8297,  6.7189, 27.0217, 11.3000, 12.4300,
        19.4219], device='cuda:0')

In [7]:
if 'twitter' in model_name:
    tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
else:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_input = tokenizer(tw_train)

m = 0
for tokens in tokenized_input['input_ids']:
    if len(tokens)>m:
        m=len(tokens)
m

111

In [8]:
MAX_LEN = 128

tokenized_input = tokenizer(tw_train, max_length=MAX_LEN, padding='max_length', truncation=True)
tokenized_test = tokenizer(tw_test, max_length=MAX_LEN, padding='max_length', truncation=True)


if 'roberta' in model_name:
    train_input_ids, train_attention_mask = tokenized_input['input_ids'], tokenized_input['attention_mask']
    test_input_ids, test_attention_mask = tokenized_test['input_ids'], tokenized_test['attention_mask']
    
    
else:
    train_input_ids, train_token_type_ids, train_attention_mask = tokenized_input['input_ids'], tokenized_input['token_type_ids'], tokenized_input['attention_mask']
    test_input_ids, test_token_type_ids, test_attention_mask = tokenized_test['input_ids'], tokenized_test['token_type_ids'], tokenized_test['attention_mask']

    train_token_type_ids = torch.tensor(train_token_type_ids)
    test_token_type_ids = torch.tensor(test_token_type_ids)
    
    
train_labels = labels_train
test_labels = labels_test


# Convert to torch tensor
train_input_ids = torch.tensor(train_input_ids)
train_labels = torch.tensor(train_labels)
train_attention_mask = torch.tensor(train_attention_mask)

test_input_ids = torch.tensor(test_input_ids)
test_labels = torch.tensor(test_labels)
test_attention_mask = torch.tensor(test_attention_mask)
test_ids = torch.tensor(ids_test)

In [9]:
models_b_size = {'roberta-base':32,
                 'bert-base-cased':32,
                 'covid-twitter-bert':6,
                 'roberta-large':10}
batch_size = models_b_size[model_name] # 32 if 256

if 'roberta' in model_name:
    train_data = TensorDataset(train_input_ids, train_attention_mask, train_labels)
    test_data = TensorDataset(test_input_ids, test_attention_mask, test_labels, test_ids)
    
else:
    train_data = TensorDataset(train_input_ids, train_attention_mask, train_labels, train_token_type_ids)
    test_data = TensorDataset(test_input_ids, test_attention_mask, test_labels, test_token_type_ids)

    
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'Tesla K80'

In [11]:
class BertClassifier(nn.Module):
    
    def __init__(self, n_classes):
        super().__init__()
        self.n_classes = n_classes
        #self.bert = BertForPreTraining.from_pretrained('digitalepidemiologylab/covid-twitter-bert-v2')    
        self.bert = BertForSequenceClassification.from_pretrained(model_name, num_labels=n_classes)
        self.sigmoid = nn.Sigmoid()
        #self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
        #self.bert.cls.seq_relationship = nn.Linear(1024, n_classes)
        
        if n_classes >1:
            self.criterion = nn.BCELoss()
        else:
            self.criterion = nn.MSELoss()
        
    def forward(self, input_ids, token_type_ids, input_mask, labels):
        outputs = self.bert(input_ids = input_ids, token_type_ids = token_type_ids, attention_mask = input_mask)
        #outputs = self.classifier(outputs.pooler_output)
        
        logits = self.sigmoid(outputs[0])
        
        loss = self.criterion(logits, labels)
        loss = (loss * weights).mean()
        
        
        return loss, logits

class CovidTwitterBertClassifier(nn.Module):
    
    def __init__(self, n_classes):
        super().__init__()
        self.n_classes = n_classes
        self.bert = BertForPreTraining.from_pretrained('digitalepidemiologylab/covid-twitter-bert-v2')    
        #self.bert = BertForSequenceClassification.from_pretrained(model_name, num_labels=n_classes)
        self.sigmoid = nn.Sigmoid()
        #self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
        self.bert.cls.seq_relationship = nn.Linear(1024, n_classes)
        
        if n_classes >1:
            self.criterion = nn.BCELoss()
        else:
            self.criterion = nn.MSELoss()
        
    def forward(self, input_ids, token_type_ids, input_mask, labels):
        outputs = self.bert(input_ids = input_ids, token_type_ids = token_type_ids, attention_mask = input_mask)
        #outputs = self.classifier(outputs.pooler_output)
        
        logits = self.sigmoid(outputs[1])
        
        loss = self.criterion(logits, labels)
        loss = (loss * weights).mean()
        
        
        return loss, logits
    
    
class RobertaClassifier(nn.Module):
    
    def __init__(self, n_classes):
        super().__init__()
        self.n_classes = n_classes
        self.bert = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=n_classes)
        self.sigmoid = nn.Sigmoid()
        if n_classes >1:
            self.criterion = nn.BCELoss(reduction='none')
            #self.criterion = nn.BCEWithLogitsLoss()
            
        else:
            self.criterion = nn.MSELoss()
        
    def forward(self, input_ids, input_mask, labels):
        outputs = self.bert(input_ids, input_mask)
        #outputs = self.classifier(outputs.pooler_output)
        logits = outputs[0]
        logits = self.sigmoid(logits)
        
        if self.n_classes == 1:
            labels=labels.float()
        loss = self.criterion(logits, labels)
        loss = (loss * weights).mean()
        
        return loss, logits

In [12]:
if 'roberta' in model_name:
    model = RobertaClassifier(9)
elif 'twitter' in model_name:
    model = CovidTwitterBertClassifier(9)
else:
    model = BertClassifier(9)
    
    
model.cuda()

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.weight', 'clas

RobertaClassifier(
  (bert): RobertaForSequenceClassification(
    (roberta): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50265, 1024, padding_idx=1)
        (position_embeddings): Embedding(514, 1024, padding_idx=1)
        (token_type_embeddings): Embedding(1, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0): RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSelfAttention(
                (query): Linear(in_features=1024, out_features=1024, bias=True)
                (key): Linear(in_features=1024, out_features=1024, bias=True)
                (value): Linear(in_features=1024, out_features=1024, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): RobertaSelfOutput(
                (den

In [13]:
#optimizer_grouped_parameters
# lr 5e-5 for base models
# lr 7e-6 for larger models
optimizer = AdamW(model.parameters(),
                  lr=7e-6,
                  weight_decay = 0.001)

scheduler = ReduceLROnPlateau(optimizer, patience=4, factor=0.3)

In [14]:
def round_regression(val):
    if val<0.5:
        return 0
    elif val<1.5:
        return 1
    else:
        return 2

In [None]:
epochs = 30

threshold = 0.5

best_MCCF = 0
best_MCCA = 0
best_F1 = 0
best_MCCs = []
best_MCCNC = 0
best_loss = 999
best_acc = 0
best_state_dict = model.state_dict()
best_epoch = 0

for e in trange(epochs, desc="Epoch"):

    # Training

    model.train()

    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0

    for step, batch in enumerate(train_dataloader):

        batch = tuple(t.to(device) for t in batch)

        if 'roberta' in model_name:
            b_input_ids, b_input_mask, b_labels = batch
        else:    
            b_input_ids, b_input_mask, b_labels, b_token_type_ids = batch
            
        if not classification:
            b_labels = b_labels.view(-1, 1)        
        
        b_labels = b_labels.float()
        optimizer.zero_grad()
        
        if 'roberta' in model_name:
            outputs = model(b_input_ids, b_input_mask, b_labels)
        else:
            outputs = model(b_input_ids, b_token_type_ids, b_input_mask, b_labels)
        loss = outputs[0]
        logits = outputs[1]
        #print(step, loss.item())

        loss.backward()
        optimizer.step()


        tr_loss += loss.item()
        nb_tr_examples += b_input_ids.size(0)
        nb_tr_steps += 1

    print("Train loss: {}".format(tr_loss/nb_tr_steps))
    
    
    # Testing
    
    model.eval()
    
    tweets_test = []
    
    predictions = []
    predictions_sep = [[], [], [], [], [], [], [], [], []]
    
    labels = []
    labels_sep = [[], [], [], [], [], [], [], [], []]
    
    eval_loss = 0
    steps=0
    # Train the data for one epoch
    for step, batch in enumerate(test_dataloader):

        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)

        if 'roberta' in model_name:
            b_input_ids, b_input_mask, b_labels, ids = batch
        else:    
            b_input_ids, b_input_mask, b_labels, b_token_type_ids = batch
            
        if not classification:
            b_labels = b_labels.view(-1, 1)        
        
        b_labels = b_labels.float()
        
        with torch.no_grad():

            if 'roberta' in model_name:
                outputs = model(b_input_ids, b_input_mask, b_labels)
            else:
                outputs = model(b_input_ids, b_token_type_ids, b_input_mask, b_labels)
            logits = outputs[1]
            loss = outputs[0]


        logits = logits.detach().cpu().numpy()
        ground_truth = b_labels.detach().cpu().numpy()
        
        steps+=1
        eval_loss+=loss.detach().item()
        
        tweets_test.append(b_input_ids)
        for p in logits:
            if classification:
                pred = p.argmax()
            else:
                pred = round_regression(p)
            predictions.append(p>threshold)
            for i in range(0, 9):
                predictions_sep[i].append(p[i]>threshold)
            
        for gt in ground_truth:
            labels.append(gt>threshold)
            for i in range(0, 9):
                labels_sep[i].append(gt[i]>threshold)
        
    MCCs = []
    for i in range(0, 9):
        MCCs.append(metrics.matthews_corrcoef(labels_sep[i], predictions_sep[i]))
    labels_one = []
    predictions_one = []
    for l in labels:
        if list(l) == [False, False, False, False, False, False, False, False, False]:
            labels_one.append(0)
        else:
            labels_one.append(1)
    for p in predictions:
        if list(p) == [False, False, False, False, False, False, False, False, False]:
            predictions_one.append(0)
        else:
            predictions_one.append(1)
    
    
    scheduler.step(eval_loss/steps)
    MCCF = metrics.matthews_corrcoef(np.array(labels).flatten(), np.array(predictions).flatten())
    ACC = metrics.accuracy_score(labels, predictions)
    LOSS = eval_loss/steps
    MCCNC = metrics.matthews_corrcoef(labels_one, predictions_one)
    F1 = metrics.f1_score(labels, predictions, average='weighted')
    MCCA = np.array(MCCs).mean()
    if MCCA> best_MCCA:
        best_MCCF = MCCF
        best_MCCA = MCCA
        best_loss = LOSS
        best_acc = ACC
        best_F1 = F1
        best_MCCs = MCCs
        best_MCCNC = MCCNC
        best_state_dict = copy.deepcopy(model.state_dict())
        best_epoch = e
    
    print("\t Eval loss: {}".format(LOSS))
    print("\t Eval ACC: {}".format(ACC))
    print("\t Eval MCCA: {}".format(MCCA))
    print("\t Eval MCCF: {}".format(MCCF))
    print("\t Eval MCCs: {}".format(MCCs))
    print("\t Eval MCC 1 vs other: {}".format(MCCNC))
    #print("\t Eval Kappa: {}".format(metrics.cohen_kappa_score(np.array(labels).flatten(), np.array(predictions).flatten())))
    print("\t Eval F1 weighted: {}".format(F1))
    #print("\t Eval F1 micro: {}".format(metrics.f1_score(labels, predictions, average='micro')))
    #print("\t Eval F1 samples: {}".format(metrics.f1_score(labels, predictions, average='samples')))
    #print("\t Eval F1 None: {}".format(metrics.f1_score(labels, predictions, average=None)))
    
    #print([predictions.count(i) for i in range(0,3)], [labels.count(i) for i in range(0, 3)])
    


Epoch:   0%|                                                 | 0/30 [00:00<?, ?it/s]

Train loss: 4.15667368221283


Epoch:   3%|█▎                                    | 1/30 [03:34<1:43:28, 214.08s/it]

	 Eval loss: 3.38576590269804
	 Eval ACC: 0.4855305466237942
	 Eval MCCA: 0.0
	 Eval MCCF: 0.0
	 Eval MCCs: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
	 Eval MCC 1 vs other: 0.0
	 Eval F1 weighted: 0.0
Train loss: 3.2818857460021973


Epoch:   7%|██▌                                   | 2/30 [07:08<1:39:54, 214.11s/it]

	 Eval loss: 2.694883955642581
	 Eval ACC: 0.49517684887459806
	 Eval MCCA: 0.16344438884748153
	 Eval MCCF: 0.25266826996359093
	 Eval MCCs: [0.0, 0.25577021009073103, 0.40414003022006, 0.0, 0.0, 0.8110892593165426, 0.0, 0.0, 0.0]
	 Eval MCC 1 vs other: 0.23360346365431425
	 Eval F1 weighted: 0.10810495286460613
Train loss: 2.500204922199249


Epoch:  10%|███▊                                  | 3/30 [10:41<1:36:13, 213.84s/it]

	 Eval loss: 2.13862815964967
	 Eval ACC: 0.5530546623794212
	 Eval MCCA: 0.4078677103884172
	 Eval MCCF: 0.5048323020601615
	 Eval MCCs: [0.0, 0.7333542672347239, 0.5197391466671093, 0.0, 0.0, 0.9098330807202929, 0.7856140711454555, 0.7222688277281736, 0.0]
	 Eval MCC 1 vs other: 0.4412783365849191
	 Eval F1 weighted: 0.3724528034800572
Train loss: 1.882095663547516


Epoch:  13%|█████                                 | 4/30 [14:15<1:32:37, 213.76s/it]

	 Eval loss: 1.8714201068505645
	 Eval ACC: 0.5852090032154341
	 Eval MCCA: 0.5162407828255708
	 Eval MCCF: 0.5446122067370258
	 Eval MCCs: [0.5301802308720894, 0.7659710913812849, 0.5581446400094104, 0.0, 0.0, 0.8151982686453727, 0.8970672676222476, 0.72222082374386, 0.3573847231558724]
	 Eval MCC 1 vs other: 0.4977314829734596
	 Eval F1 weighted: 0.4139253603086656


In [None]:
print('Best epoch ', best_epoch)
print("\t Eval loss: {}".format(best_loss))
print("\t Eval ACC: {}".format(best_acc))
print("\t Eval MCCA: {}".format(best_MCCA))
print("\t Eval MCCF: {}".format(best_MCCF))
print("\t Eval MCCs: {}".format(best_MCCs))
print("\t Eval MCC 1 vs other: {}".format(best_MCCNC))
print("\t Eval F1 weighted: {}".format(best_F1))

In [None]:
raise error

In [None]:
#torch.save(best_state_dict, './Models/task2/roberta-base-all-train.pth')

In [None]:
!ls ./Models/task2/

In [None]:
df = pd.read_csv('./mediaeval-fakenews/data/test-clean.csv')
ids_test = df['ids']
tw_test = df['tweet']

if clean_tweets_flag:
    tw_test = clean_tweets(tw_test)

if replace_lowercase_flag:
    tw_test = to_lowercase(tw_test)

if remove_stopwords_flag:
    tw_test = remove_stopwords(tw_test)

if remove_hashtags_flag:
    tw_test = remove_hashtags(tw_test)

if replace_emojis_flag:
    tw_test = replace_emojis(tw_test)


In [None]:
tokenized_test = tokenizer(tw_test, max_length=MAX_LEN, padding='max_length', truncation=True)
test_input_ids, test_attention_mask = tokenized_test['input_ids'], tokenized_test['attention_mask']

test_labels = []
for i in range(0, len(ids_test)):
    test_labels.append([1,1,1,1,1,1,1,1,1])

test_input_ids = torch.tensor(test_input_ids)
test_attention_mask = torch.tensor(test_attention_mask)
test_labels = torch.tensor(test_labels)
test_ids = torch.tensor(ids_test)

In [None]:
batch_size = 70 # 32 if 256

if 'roberta' in model_name:
    test_data = TensorDataset(test_input_ids, test_attention_mask, test_labels, test_ids)
    
else:
    test_data = TensorDataset(test_input_ids, test_attention_mask, test_token_type_ids)

test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

In [None]:
#model.load_state_dict(torch.load('./Models/task2/'+model_name+'_CV'+str(k)+'_e'+str(best_epoch)+'_'+str(round(best_MCCA, 3))+'.pth'))
model.load_state_dict(torch.load('./Models/task2/roberta-base-all-train.pth'))
model.eval()
    
tweets_test = []
ids_test = []

predictions = []
predictions_sep = [[], [], [], [], [], [], [], [], []]

labels = []
labels_sep = [[], [], [], [], [], [], [], [], []]

logits_test = []

eval_loss = 0
steps=0
# Train the data for one epoch
for step, batch in enumerate(test_dataloader):

    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)

    if 'roberta' in model_name:
        b_input_ids, b_input_mask, b_labels, ids = batch
    else:    
        b_input_ids, b_input_mask, b_labels, b_token_type_ids = batch

    if not classification:
        b_labels = b_labels.view(-1, 1)        

    b_labels = b_labels.float()

    with torch.no_grad():

        if 'roberta' in model_name:
            outputs = model(b_input_ids, b_input_mask, b_labels)
        else:
            outputs = model(b_input_ids, b_token_type_ids, b_input_mask, b_labels)
        logits = outputs[1]
        loss = outputs[0]


    logits = logits.detach().cpu().numpy()
    ground_truth = b_labels.detach().cpu().numpy()

    for l in logits:
        logits_test.append(l)
    
    steps+=1
    eval_loss+=loss.detach().item()
    for i in ids:
        ids_test.append(i)
    for tw in b_input_ids:
        tweets_test.append(tokenizer.decode(tw))
    for p in logits:
        if classification:
            pred = p.argmax()
        else:
            pred = round_regression(p)
        predictions.append(p>threshold)
        for i in range(0, 9):
            predictions_sep[i].append(p[i]>threshold)

    for gt in ground_truth:
        labels.append(gt>threshold)
        for i in range(0, 9):
            labels_sep[i].append(gt[i]>threshold)

MCCs = []
for i in range(0, 9):
    MCCs.append(metrics.matthews_corrcoef(labels_sep[i], predictions_sep[i]))
labels_one = []
predictions_one = []
for l in labels:
    if list(l) == [False, False, False, False, False, False, False, False, False]:
        labels_one.append(0)
    else:
        labels_one.append(1)
for p in predictions:
    if list(p) == [False, False, False, False, False, False, False, False, False]:
        predictions_one.append(0)
    else:
        predictions_one.append(1)

In [None]:
new_df = pd.DataFrame()
new_df['predictions'] = logits_test
new_df['ids'] = test_ids
#new_df.to_csv('./results/task2_cv'+str(k)+'_logits.csv', index=False)
new_df

In [None]:
new_df = pd.DataFrame()
new_df['predictions'] = predictions
new_df['ids'] = test_ids
#new_df.to_csv('./results/teset-roberta-base-task2.csv', index=False)
new_df

In [None]:
test_inputs = []
for i in range(0, len(tweets_test)):
    for j in range(0, 32):
        tw = tokenizer.decode(tweets_test[i][j].cpu().numpy())
        tw = tw.split('<s>')[1].split('</s>')[0]
        test_inputs.append(tw)

In [None]:
for i in range(0, len(labels)):
    for j in range(0, 9):
        if labels[i][j] != predictions[i][j]:
            print(conspiracies[j], labels[i][j], test_inputs[i])


In [None]:
CVs = []
CVs.append(0.690803972664749)
CVs.append(0.5558459007027099)
CVs.append(0.6826415214096324)
CVs.append(0.7313539869700288)
CVs.append(0.7069902423375098)

L = []
try:
    for i in range(0, 9):
        print((CVs[0][i]+CVs[1][i]+CVs[2][i]+CVs[3][i]+CVs[4][i])/5)
        L.append((CVs[0][i]+CVs[1][i]+CVs[2][i]+CVs[3][i]+CVs[4][i])/5)
    print(L)
except:
    print(sum(CVs)/5)


roberta-base-ne-rh-cw 20 epoch
    
    MCC: 0.6654098900854347
    MCCs: [0.5524150476052554, 0.7679068700779181, 0.7434705970293489, 0.53421306694965, 0.31320778183707854, 0.8738837418030807, 0.827025851500666, 0.8654943602641909, 0.6778128959922297]
    MCC 1 vs other: 0.6013349313398135
    F1 score: 0.673527124816926

    CV0
     Eval MCC: 0.6693922816899639
	 Eval MCCs: [0.42860050121638743, 0.7252650176678446, 0.7722176144987796, 0.6302199352192639, 0.3234153127511404, 0.9098330807202929, 0.852192697907109, 0.8391181413957557, 0.5878134599824882]
	 Eval MCC 1 vs other: 0.5622651298968165
	 Eval F1 weighted: 0.690803972664749
    
    CV1
     Eval MCC: 0.6000358463779237
	 Eval MCCs: [0.5615601503759399, 0.6576259690552511, 0.7209708270079217, 0.40690082151862383, 0.0797899190540391, 0.7750680390694658, 0.780761990291456, 0.8812900814794339, 0.6483410729958033]
	 Eval MCC 1 vs other: 0.5575745033112582
	 Eval F1 weighted: 0.5558459007027099
    
    CV2
     Eval MCC: 0.6639509510033542
	 Eval MCCs: [0.4515089332178771, 0.7955522730224036, 0.7825896115018466, 0.48974569319114025, 0.3298767698501493, 0.8654490514993884, 0.7978919748135389, 0.8506850935533509, 0.7216546169363538]
	 Eval MCC 1 vs other: 0.640555078097185
	 Eval F1 weighted: 0.6826415214096324
     
    CV3
     Eval MCC: 0.7026550886410515
	 Eval MCCs: [0.6700308105163018, 0.7820634449596706, 0.760169654906497, 0.5701386573495464, 0.5108042035961533, 0.9057575757575758, 0.8485167984719844, 0.8202585452844271, 0.7364406779661017]
	 Eval MCC 1 vs other: 0.6333095824234448
	 Eval F1 weighted: 0.7313539869700288

    CV4
     Eval MCC: 0.69101528271488
	 Eval MCCs: [0.6503748426997711, 0.8790276456844205, 0.6814052772316999, 0.5740602274696759, 0.3221527039339106, 0.9133109619686801, 0.8557657960192416, 0.9361199396079867, 0.6948146520804012]
	 Eval MCC 1 vs other: 0.612970362970363
	 Eval F1 weighted: 0.7069902423375098




roberta-base-ne-rh-ct 'virus'

    MCC 0.5645707750570181
    MCCs [0.0, 0.6924143845779678, 0.7367030570567744, 0.48754798145059786, 0.1396599352715253, 0.6487169994487173, 0.7603298255290551, 0.7285900272338771, 0.0]
    MCC 1 vs other: 0.5573428018644087
    F1 weighted:0.5060391684024019

    CV0
     Eval MCC: 0.572002402314867
	 Eval MCCs: [0.0, 0.7171500972418344, 0.6778292495719211, 0.537598504095889, 0.1441954356128429, 0.8110892593165426, 0.8436782760095076, 0.6018584254641897, 0.0]
	 Eval MCC 1 vs other: 0.506613144282354
	 Eval F1 weighted: 0.5183449582118178
     
    CV1
     Eval MCC: 0.5675140765889306
	 Eval MCCs: [0.0, 0.6700157588234843, 0.7133200840083925, 0.4721250856617546, 0.19309889086137313, 0.6059156682132061, 0.8122992568299051, 0.7928260719998715, 0.0]
	 Eval MCC 1 vs other: 0.5828297879563119
	 Eval F1 weighted: 0.5018142134794671

    CV2
     Eval MCC: 0.5228772733803458
	 Eval MCCs: [0.0, 0.6335790996296946, 0.7550290307342492, 0.44438977344110725, 0.23157653872238865, 0.401588002072828, 0.54882259801054, 0.8159121192042134, 0.0]
	 Eval MCC 1 vs other: 0.5413682948596558
	 Eval F1 weighted: 0.47918670656828255

    CV3
     Eval MCC: 0.5727971743886916
	 Eval MCCs: [0.0, 0.730986238974042, 0.7184699986803038, 0.463317674222242, 0.12942881116102176, 0.6675573372931909, 0.7640228470566507, 0.8159121192042134, 0.0]
	 Eval MCC 1 vs other: 0.5509235787277967
	 Eval F1 weighted: 0.523994837912579

    CV4
     Eval MCC: 0.5876629486122559
	 Eval MCCs: [0.0, 0.7103407282207833, 0.818866922289005, 0.5203088698319962, 0.0, 0.7574347303478193, 0.832826149738672, 0.6164414002968975, 0.0]
	 Eval MCC 1 vs other: 0.6049792034959248
	 Eval F1 weighted: 0.5068551258398635

roberta-base-ne-rh-ct 'wuhan virus'

    MCC 0.5936937677539237
    MCCs [0.0, 0.7586291798047583, 0.7269793798057074, 0.5216994682535349, 0.25717796831777495, 0.5339780501326101, 0.7915754861444928, 0.7768418620119917, 0.0]
    MCC 1 vs other: 0.5556210118597071
    F1 weighted:0.5587084862311549

    CV0
     Eval MCC: 0.5554272912121295
	 Eval MCCs: [0.0, 0.8351536074236927, 0.6322138486966449, 0.4774271024459417, 0.23407803057086504, 0.5697781346214729, 0.7915721885745207, 0.6417189156063434, 0.0]
	 Eval MCC 1 vs other: 0.486812780765384
	 Eval F1 weighted: 0.5153900339085524
     
    CV1
     Eval MCC: 0.5718827243372663
	 Eval MCCs: [0.0, 0.6860171630489652, 0.6558787445931271, 0.5056934321332517, 0.0, 0.6675573372931909, 0.8057464744632933, 0.845139371060569, 0.0]
	 Eval MCC 1 vs other: 0.5502019433115497
	 Eval F1 weighted: 0.4906719203456417

    CV2
     Eval MCC: 0.6470762868824049
	 Eval MCCs: [0.0, 0.7941665712025352, 0.7905239190483594, 0.5853175186526681, 0.40103364169193567, 0.7574556324219388, 0.7659710913812849, 0.8247393072270269, 0.0]
	 Eval MCC 1 vs other: 0.6135952990847273
	 Eval F1 weighted: 0.6374894722045488


    CV3
     Eval MCC: 0.598427658779346
	 Eval MCCs: [0.0, 0.7360097161961785, 0.764603775659926, 0.4848061257435033, 0.3291712127171663, 0.6750991463264484, 0.7739841300146298, 0.8384988440308885, 0.0]
	 Eval MCC 1 vs other: 0.5630615977102935
	 Eval F1 weighted: 0.596768643986279

    CV4
     Eval MCC: 0.5956548775584712
	 Eval MCCs: [0.0, 0.74179884115242, 0.7916766110304798, 0.5552531622923093, 0.3216069566089075, 0.0, 0.8206035462887359, 0.7341128721351309, 0.0]
	 Eval MCC 1 vs other: 0.5644334384265817
	 Eval F1 weighted: 0.5532223607107528

roberta-base-ne-rh

    MCC 0.5831859798218391
    MCCs [0.0, 0.7451054119555204, 0.721440142207641, 0.49050237642063255, 0.1976472077983185, 0.5732948384655411, 0.7802770074383321, 0.7912258973690008, 0.04612988295774134]
    MCC 1 vs other: 0.5687445014242034
    F1 weighted:0.538882154144453

    CV0
     Eval MCC: 0.6062767753314612
	 Eval MCCs: [0.0, 0.7978919748135389, 0.7534478479246006, 0.4958912930781506, 0.22977378007548427, 0.7001170862264607, 0.8352620506089892, 0.7565073210184726, 0.0]
	 Eval MCC 1 vs other: 0.5322233699467324
	 Eval F1 weighted: 0.5634600925874608

    CV1
     Eval MCC: 0.5672260567041271
	 Eval MCCs: [0.0, 0.6718013873727162, 0.6469791854964575, 0.5124402467976967, 0.11320694593849961, 0.7976905619151519, 0.7732926801197991, 0.7951494147635916, 0.0]
	 Eval MCC 1 vs other: 0.6054918412307335
	 Eval F1 weighted: 0.5094482393771169

    CV2
     Eval MCC: 0.6087265482755518
	 Eval MCCs: [0.0, 0.809855710442651, 0.8074089012633905, 0.44312948403800534, 0.3724457686984207, 0.2835072205183168, 0.7267181052190558, 0.8008233460395208, 0.24387662137157162]
	 Eval MCC 1 vs other: 0.6037893387335158
	 Eval F1 weighted: 0.5665388296399381

    CV3
     Eval MCC: 0.5810343853643407
	 Eval MCCs: [0.0, 0.7021614307078777, 0.6936817538580171, 0.5673576808151052, 0.27280954427918797, 0.5154060995580372, 0.7516170021078921, 0.8159894763325497, -0.0132272065828649]
	 Eval MCC 1 vs other: 0.5877040351313282
	 Eval F1 weighted: 0.5763568993972661

    CV4
     Eval MCC: 0.5526661334337145
	 Eval MCCs: [0.0, 0.7438165564408182, 0.7056830224957397, 0.4336931773742048, 0.0, 0.5697532241097388, 0.8144951991359242, 0.7876599286908698, 0.0]
	 Eval MCC 1 vs other: 0.514513922078707
	 Eval F1 weighted: 0.46725957398642093

roberta-base

    MCC 0.5688375262501378
    MCCs [0.0, 0.7574712203204303, 0.6755199677685797, 0.492401896189082, 0.16985821970869738, 0.3141116164825034, 0.7731892215113226, 0.7718570131106022, 0.04877532427431432]
    MCC 1 vs other: 0.568850900245214
    F1 weighted:0.5066875677553945
    

    CV0
     Eval MCC: 0.5711766541368144
	 Eval MCCs: [0.0, 0.7659710913812849, 0.7187597006180627, 0.5015032677457548, 0.2718953123929632, 0.5697781346214729, 0.7502508453587193, 0.6926817959734433, 0.0]
	 Eval MCC 1 vs other: 0.5387349060978782
	 Eval F1 weighted: 0.5389900137652603

    CV1
     Eval MCC: 0.5851019532287719
	 Eval MCCs: [0.0, 0.7289243733533024, 0.6180879948332005, 0.5954018773784128, 0.20495001745210306, 0.7172727272727273, 0.7972091810119979, 0.8096021101018593, 0.0]
	 Eval MCC 1 vs other: 0.6259699863614578
	 Eval F1 weighted: 0.5422259076156084

    CV2
     Eval MCC: 0.6087265482755518
	 Eval MCCs: [0.0, 0.809855710442651, 0.8074089012633905, 0.44312948403800534, 0.3724457686984207, 0.2835072205183168, 0.7267181052190558, 0.8008233460395208, 0.24387662137157162]
	 Eval MCC 1 vs other: 0.6037893387335158
	 Eval F1 weighted: 0.5665388296399381

    CV3
     Eval MCC: 0.5265191071699037
	 Eval MCCs: [0.0, 0.7186064694657204, 0.5907177267502242, 0.4041221033925372, 0.0, 0.0, 0.8122026581712178, 0.7948230886553483, 0.0]
	 Eval MCC 1 vs other: 0.5853958149828832
	 Eval F1 weighted: 0.4465593824117734

    CV4
     Eval MCC: 0.5526633684396475
	 Eval MCCs: [0.0, 0.7639984569591931, 0.6426255153780203, 0.5178527483906997, 0.0, 0.0, 0.779565317795622, 0.7613547247828389, 0.0]
	 Eval MCC 1 vs other: 0.49036445505033527
	 Eval F1 weighted: 0.43912370534439193

roberta-model threshold 0.5:
	 Eval loss: 0.1838788330554962
	 Eval ACC: 0.5273311897106109
	 Eval MCC: 0.5960125462227716
	 Eval MCCs: [0.0, 0.6329781556312386, 0.7820744987599528, 0.44384639988077706, 0.17954940420025947, 0.7056340922398517, 0.7512385845326037, 0.7732538663940368, 0.0]
	 Eval Kappa: 0.5711325966850829

roberta-model threshold 0.3:
     Eval loss: 0.21058178693056107
	 Eval ACC: 0.4662379421221865
	 Eval MCC: 0.6573546169315774
	 Eval MCCs: [0.0, 0.6876344223650447, 0.7410155972655911, 0.4315558580462087, 0.13755046869478835, 0.0, 0.666130102959929, 0.8384782365353266, 0.0]
	 Eval Kappa: 0.6284364666981577

roberta-model threshold 0.8:
     Eval loss: 0.1838788330554962
	 Eval ACC: 0.48231511254019294
	 Eval MCC: 0.40335439607990653
	 Eval MCCs: [0.0, 0.638688367240694, 0.643168215736974, 0.25474696472398095, 0.0, 0.0, 0.678643320836891, 0.508190350779955, 0.0]
	 Eval Kappa: 0.3526192337763878
     