In [1]:
import random
from torch.utils.data import DataLoader, SequentialSampler
import torch

from transformers import BertTokenizer
from torch.utils.data import TensorDataset

from transformers import BertForSequenceClassification


We are using PyTorch as our deep learning framework. 
Importing necessary libraries to pre-processing, tokenizing and evaluation.

In [2]:
import os
import numpy as np
import pandas as pd
import gensim
from sklearn.metrics import f1_score, precision_score, recall_score


Checking the device. We will proceed if there is a GPU available.

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


print(device)
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3, 1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3, 1), 'GB')
else:
    exit(0)

cuda
Tesla T4
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


Download the test set and load into a DataFrame.

In [4]:
#if not os.path.isfile("./Dataset/github-labels-top3-803k-test.csv"):
#    !curl "https://tickettagger.blob.core.windows.net/datasets/github-labels-top3-803k-test.tar.gz" | tar -xz 
#    #!mv github-labels-top3-803k-test.csv ./Dataset/

testdf = pd.read_csv("/kaggle/input/corona-nlp-test/Corona_NLP_test.csv")
print(testdf.head())

   UserName  ScreenName             Location     TweetAt  \
0         1       44953                  NYC  02-03-2020   
1         2       44954          Seattle, WA  02-03-2020   
2         3       44955                  NaN  02-03-2020   
3         4       44956          Chicagoland  02-03-2020   
4         5       44957  Melbourne, Victoria  03-03-2020   

                                       OriginalTweet           Sentiment  
0  TRENDING: New Yorkers encounter empty supermar...  Extremely Negative  
1  When I couldn't find hand sanitizer at Fred Me...            Positive  
2  Find out how you can protect yourself and love...  Extremely Positive  
3  #Panic buying hits #NewYork City as anxious sh...            Negative  
4  #toiletpaper #dunnypaper #coronavirus #coronav...             Neutral  


Use the same label map used in the training.

In [5]:
label_dict = {'Neutral': 0, 'Positive': 1, 'Extremely Negative': 2, 'Negative': 3, 'Extremely Positive': 4}
testdf['label'] = testdf.Sentiment.replace(label_dict)

  testdf['label'] = testdf.Sentiment.replace(label_dict)


Pre-preocessing function for removing whitespace and creating new feature.

In [6]:
def preprocess(row):
    # concatenate title and body, then remove whitespaces
    doc = ""
    doc += str(row.TweetAt)
    doc += " "
    doc += str(row.OriginalTweet)
    # https://radimrehurek.com/gensim/parsing/preprocessing.html
    doc = gensim.parsing.preprocessing.strip_multiple_whitespaces(doc)
    return doc

Applying preporcessing step on the dataframe.

In [7]:
testdf['tweet_info'] = testdf.apply(preprocess, axis=1)

newTestDF = testdf[['Sentiment', 'OriginalTweet', 'label']]
testdf = newTestDF.copy()
print(testdf.head())

            Sentiment                                      OriginalTweet  \
0  Extremely Negative  TRENDING: New Yorkers encounter empty supermar...   
1            Positive  When I couldn't find hand sanitizer at Fred Me...   
2  Extremely Positive  Find out how you can protect yourself and love...   
3            Negative  #Panic buying hits #NewYork City as anxious sh...   
4             Neutral  #toiletpaper #dunnypaper #coronavirus #coronav...   

   label  
0      2  
1      1  
2      4  
3      3  
4      0  


Initiating tokenizer and encoding the test set.

In [8]:

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                          do_lower_case=True)


encoded_data_test = tokenizer.batch_encode_plus(
    testdf.OriginalTweet.values,
    add_special_tokens=True,
    return_attention_mask=True,
    padding='longest',
    truncation=True,
    return_tensors='pt'
)

input_ids_test = encoded_data_test['input_ids']
attention_masks_test = encoded_data_test['attention_mask']
labels_test = torch.tensor(testdf.label.values)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Creating TensorDataset from encoded and masked test and creating a dataloader for testing.

In [9]:
dataset_test = TensorDataset(input_ids_test, attention_masks_test, labels_test)

batch_size = 4


dataloader_test = DataLoader(dataset_test,
                             sampler=SequentialSampler(dataset_test),
                             batch_size=batch_size)

Declaring function for result generation.

In [10]:

def result_generation(preds, labels):
    label_dict_inverse = {v: k for k, v in label_dict.items()}

    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat == label]
        y_true = labels_flat[labels_flat == label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')

        P_c = precision_score(labels_flat, preds_flat, average=None, labels=[label])[0]
        R_c = recall_score(labels_flat, preds_flat, average=None, labels=[label])[0]
        F1_c = f1_score(labels_flat, preds_flat, average=None, labels=[label])[0]

        print(f"=*= {label_dict_inverse[label]} =*=")
        # print("Full precision:\t",P_c)
        # print("Full recall:\t\t",R_c)
        # print("Full F1 score:\t",F1_c)
        print(f"precision:\t{P_c:.4f}")
        print(f"recall:\t\t{R_c:.4f}")
        print(f"F1 score:\t{F1_c:.4f}")
        print()

    P = precision_score(labels_flat, preds_flat, average='micro')
    R = recall_score(labels_flat, preds_flat, average='micro')
    F1 = f1_score(labels_flat, preds_flat, average='micro')

    print("=*= global =*=")
    print(f"precision:\t{P:.4f}")
    print(f"recall:\t\t{R:.4f}")
    print(f"F1 score:\t{F1:.4f}")
    print()

Fixing seed value for random sampling.

In [11]:

seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

Declaring the function for evaluating.

In [12]:
def evaluate(model, dataloader_val):
    
    model.eval()

    loss_val_total = 0
    predictions, true_vals = [], []

    for batch in dataloader_val:

        batch = tuple(b.to(device) for b in batch)

        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                  }

        with torch.no_grad():
            outputs = model(**inputs)

        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)

    loss_val_avg = loss_val_total/len(dataloader_val)

    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)

    return loss_val_avg, predictions, true_vals


Evaluating the model on different model states.

In [13]:
for i in range(1, 5):
    print("Epoch: ", i)
    model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                          num_labels=len(
                                                              label_dict),
                                                          output_attentions=False,
                                                          output_hidden_states=False)

    model.to(device)
    model.load_state_dict(torch.load(
        '/kaggle/input/corona-nlp-test-model/pytorch/slug/1/model_colab.pth', map_location=device))

    # %%
    _, predictions, true_vals = evaluate(model, dataloader_test)

    # %%
    result_generation(predictions, true_vals)

Epoch:  1


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Class: Neutral
Accuracy: 534/619

=*= Neutral =*=
precision:	0.8725
recall:		0.8627
F1 score:	0.8676

Class: Positive
Accuracy: 760/947

=*= Positive =*=
precision:	0.8342
recall:		0.8025
F1 score:	0.8181

Class: Extremely Negative
Accuracy: 525/592

=*= Extremely Negative =*=
precision:	0.8294
recall:		0.8868
F1 score:	0.8571

Class: Negative
Accuracy: 849/1041

=*= Negative =*=
precision:	0.8307
recall:		0.8156
F1 score:	0.8231

Class: Extremely Positive
Accuracy: 528/599

=*= Extremely Positive =*=
precision:	0.8516
recall:		0.8815
F1 score:	0.8663

=*= global =*=
precision:	0.8415
recall:		0.8415
F1 score:	0.8415

Epoch:  2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Class: Neutral
Accuracy: 534/619

=*= Neutral =*=
precision:	0.8725
recall:		0.8627
F1 score:	0.8676

Class: Positive
Accuracy: 760/947

=*= Positive =*=
precision:	0.8342
recall:		0.8025
F1 score:	0.8181

Class: Extremely Negative
Accuracy: 525/592

=*= Extremely Negative =*=
precision:	0.8294
recall:		0.8868
F1 score:	0.8571

Class: Negative
Accuracy: 849/1041

=*= Negative =*=
precision:	0.8307
recall:		0.8156
F1 score:	0.8231

Class: Extremely Positive
Accuracy: 528/599

=*= Extremely Positive =*=
precision:	0.8516
recall:		0.8815
F1 score:	0.8663

=*= global =*=
precision:	0.8415
recall:		0.8415
F1 score:	0.8415

Epoch:  3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Class: Neutral
Accuracy: 534/619

=*= Neutral =*=
precision:	0.8725
recall:		0.8627
F1 score:	0.8676

Class: Positive
Accuracy: 760/947

=*= Positive =*=
precision:	0.8342
recall:		0.8025
F1 score:	0.8181

Class: Extremely Negative
Accuracy: 525/592

=*= Extremely Negative =*=
precision:	0.8294
recall:		0.8868
F1 score:	0.8571

Class: Negative
Accuracy: 849/1041

=*= Negative =*=
precision:	0.8307
recall:		0.8156
F1 score:	0.8231

Class: Extremely Positive
Accuracy: 528/599

=*= Extremely Positive =*=
precision:	0.8516
recall:		0.8815
F1 score:	0.8663

=*= global =*=
precision:	0.8415
recall:		0.8415
F1 score:	0.8415

Epoch:  4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Class: Neutral
Accuracy: 534/619

=*= Neutral =*=
precision:	0.8725
recall:		0.8627
F1 score:	0.8676

Class: Positive
Accuracy: 760/947

=*= Positive =*=
precision:	0.8342
recall:		0.8025
F1 score:	0.8181

Class: Extremely Negative
Accuracy: 525/592

=*= Extremely Negative =*=
precision:	0.8294
recall:		0.8868
F1 score:	0.8571

Class: Negative
Accuracy: 849/1041

=*= Negative =*=
precision:	0.8307
recall:		0.8156
F1 score:	0.8231

Class: Extremely Positive
Accuracy: 528/599

=*= Extremely Positive =*=
precision:	0.8516
recall:		0.8815
F1 score:	0.8663

=*= global =*=
precision:	0.8415
recall:		0.8415
F1 score:	0.8415

