In [35]:
from typing import Dict

import matplotlib.pyplot as plt
import nlp
import numpy as np
import pandas as pd
import torch
import transformers
from captum.attr import (IntegratedGradients, LayerIntegratedGradients,
                         configure_interpretable_embedding_layer,
                         remove_interpretable_embedding_layer)

from tqdm.notebook import tqdm
from captum.attr import visualization as viz
from torch.utils.data import TensorDataset
from transformers import (ElectraForSequenceClassification,
                          ElectraTokenizerFast, EvalPrediction, InputFeatures,
                          Trainer, TrainingArguments, glue_compute_metrics)

import tensorflow as tf 
transformers.__version__

'4.12.5'

In [3]:
model = ElectraForSequenceClassification.from_pretrained(
    "google/electra-small-discriminator", num_labels = 3)

tokenizer = ElectraTokenizerFast.from_pretrained(
    "google/electra-small-discriminator", do_lower_case=True)  

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/51.7M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

In [4]:
df = pd.read_csv('./../Naive_Bayes/tweets/allLabeledTweets.csv')
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(df.index.values, 
                                                  df.label.values, 
                                                  test_size=0.15, 
                                                  random_state=42, 
                                                  stratify=df.label.values)

df['data_type'] = ['not_set']*df.shape[0]

df.loc[X_train, 'data_type'] = 'train'
df.loc[X_val, 'data_type'] = 'val'

df.groupby(['label', 'data_type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,message,id,tweetId,createdAt,language,inReplyToStatusId,inReplyToUserId,inReplyToScreenName,userId,userName,...,placeType,retweetedId,retweetCount,message_lowercase,clean_message,clean_message_no_punct,clean_message_no_stopwords_from_list,clean_message_no_punct_no_stopwords_from_list,clean_message_no_punct_no_freq_stopwords,clean_message_no_freq_stopwords
label,data_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,train,205,205,205,205,205,0,0,0,205,205,...,9,0,205,205,205,205,205,205,205,205
0,val,37,37,37,37,37,0,0,0,37,37,...,0,0,37,37,37,37,37,37,37,37
1,train,71,71,71,71,71,0,0,0,71,71,...,5,0,71,71,71,71,71,71,71,71
1,val,13,13,13,13,13,0,0,0,13,13,...,4,0,13,13,13,13,13,13,13,13
2,train,103,103,103,103,103,0,0,0,103,103,...,10,0,103,103,103,103,103,103,103,103
2,val,18,18,18,18,18,0,0,0,18,18,...,0,0,18,18,18,18,18,18,18,18


In [6]:
df[df.data_type=='train']['label'].value_counts()

0    205
2    103
1     71
Name: label, dtype: int64

In [7]:
df_val = [df[df.data_type=='val'].message_lowercase, df[df.data_type=='val'].label]

df_train = [df[df.data_type=='train'].message_lowercase, df[df.data_type=='train'].label]
df_train = pd.concat(df_train, axis=1, keys=["message", "label"])

df_0 = df_train[df_train['label']==0]
df_1 = df_train[df_train['label']==1]
df_2 = df_train[df_train['label']==2]

df_0_downsampled = df_0.sample(df_1.shape[0])
df_2_downsampled = df_2.sample(df_1.shape[0])

df_train = pd.concat([df_0_downsampled, df_2_downsampled, df_1])

df_train['label'].value_counts()

0    71
2    71
1    71
Name: label, dtype: int64

In [13]:
encoded_data_train = tokenizer.batch_encode_plus(
    df[df.data_type=='train'].message_lowercase.values.tolist(), 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)

encoded_data_val = tokenizer.batch_encode_plus(
    df[df.data_type=='val'].message_lowercase.values.tolist(), 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)


input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(df[df.data_type=='train'].label.values)

input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val = torch.tensor(df[df.data_type=='val'].label.values)

dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)

In [21]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

batch_size = 3

dataloader_train = DataLoader(dataset_train, 
                              sampler=RandomSampler(dataset_train), 
                              batch_size=batch_size)

dataloader_validation = DataLoader(dataset_val, 
                                   sampler=SequentialSampler(dataset_val), 
                                   batch_size=batch_size)

In [22]:
from transformers import AdamW, get_linear_schedule_with_warmup

optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)
                  
epochs = 5

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=0,
                                            num_training_steps=len(dataloader_train)*epochs)

In [23]:
from sklearn.metrics import f1_score

def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')

def accuracy_per_class(preds, labels):
    label_dict = {0: 0, 1: 1, 2: 2,}
    label_dict_inverse = {v: k for k, v in label_dict.items()}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')

In [24]:
import random
import numpy as np

seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

def evaluate(dataloader_val):

    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_val:
        
        batch = tuple(b.to(torch.device('cpu')) for b in batch)
        
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [28]:
for epoch in tqdm(range(1, epochs+1)):
    
    model.train()
    
    loss_train_total = 0

    progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    for batch in progress_bar:

        model.zero_grad()
        
        batch = tuple(b.to(torch.device('cpu')) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }       

        outputs = model(**inputs)
        
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()
        
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
         
        
    torch.save(model.state_dict(), f'models/finetuned_electra_epoch_{epoch}.model')
        
    tqdm.write(f'\nEpoch {epoch}')
    
    loss_train_avg = loss_train_total/len(dataloader_train)            
    tqdm.write(f'Training loss: {loss_train_avg}')
    
    val_loss, predictions, true_vals = evaluate(dataloader_validation)
    val_f1 = f1_score_func(predictions, true_vals)
    tqdm.write(f'Validation loss: {val_loss}')
    tqdm.write(f'F1 Score (Weighted): {val_f1}')

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/127 [00:00<?, ?it/s]


Epoch 1
Training loss: 1.0295860114998705
Validation loss: 0.9610641831937043
F1 Score (Weighted): 0.44386065105653905


Epoch 2:   0%|          | 0/127 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.9637022792823672
Validation loss: 0.8942327343899271
F1 Score (Weighted): 0.6431433254416317


Epoch 3:   0%|          | 0/127 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.9212609773545753
Validation loss: 0.8582068914952485
F1 Score (Weighted): 0.6434766214177979


Epoch 4:   0%|          | 0/127 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.8919135726342989
Validation loss: 0.8368463879046233
F1 Score (Weighted): 0.6332607116920842


Epoch 5:   0%|          | 0/127 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.8793646195742089
Validation loss: 0.8349919630133588
F1 Score (Weighted): 0.6471785225718195


In [30]:
model.load_state_dict(torch.load('models/finetuned_electra_epoch_5.model', map_location=torch.device('cpu')))

_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class(predictions, true_vals)

Class: 0
Accuracy: 34/37

Class: 1
Accuracy: 0/13

Class: 2
Accuracy: 15/18



In [50]:
print(true_vals)
print(predictions)

[1 2 0 2 1 0 1 1 0 0 1 2 2 2 0 0 0 0 0 0 0 2 0 0 0 2 2 1 0 0 0 0 1 2 0 2 2
 0 1 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 2 2 2 0 2 2 2]
[[ 0.75248384 -0.41496083 -0.4856582 ]
 [-0.14194034 -0.11898068  0.24542916]
 [ 0.74668944 -0.42942405 -0.48894396]
 [-0.15658934 -0.11812048  0.26045015]
 [ 0.00734226 -0.16243663  0.13312449]
 [ 0.48078528 -0.3187137  -0.2184791 ]
 [-0.11070161 -0.13070089  0.2216448 ]
 [-0.0313507  -0.14750892  0.15956946]
 [ 0.7650405  -0.41029516 -0.49204502]
 [ 0.74409    -0.41239318 -0.49250978]
 [ 0.4694721  -0.35700998 -0.17277478]
 [ 0.7250892  -0.43432662 -0.44397727]
 [-0.19618417 -0.1008658   0.2918133 ]
 [-0.13877791 -0.11648835  0.24516325]
 [ 0.7576303  -0.39930782 -0.48320243]
 [ 0.7494028  -0.41450962 -0.43213767]
 [ 0.76045114 -0.42635527 -0.46692762]
 [ 0.78176314 -0.40504798 -0.4872813 ]
 [ 0.77728254 -0.41021258 -0.48305684]
 [ 0.76856697 -0.40208617 -0.48172393]
 [ 0.74877006 -0.4297683  -0.48166183]
 [-0.08302069 -0.15043291  0.22196342]
 [ 

In [51]:
preds_flat = np.argmax(predictions, axis=1).flatten()
labels_flat = true_vals.flatten()

print(preds_flat)
print(labels_flat)

[0 2 0 2 2 0 2 2 0 0 0 0 2 2 0 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 0 2 0 2 2
 0 0 2 0 0 0 2 2 0 0 0 2 0 0 0 0 0 0 2 0 2 2 0 0 2 0 2 2 2 2 2]
[1 2 0 2 1 0 1 1 0 0 1 2 2 2 0 0 0 0 0 0 0 2 0 0 0 2 2 1 0 0 0 0 1 2 0 2 2
 0 1 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 2 2 2 0 2 2 2]


In [52]:
tf.math.confusion_matrix(
    labels_flat, preds_flat, num_classes=3, weights=None, dtype=tf.dtypes.int32,
    name=None
)

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[34,  0,  3],
       [ 6,  0,  7],
       [ 3,  0, 15]])>

In [55]:
labels_flat.shape

(68,)

In [64]:
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

print(classification_report(labels_flat, preds_flat, zero_division=0))

pd.DataFrame(
    confusion_matrix(labels_flat, preds_flat),
index = [['actual', 'actual', 'actual'], ['neutral', 'positive', 'negative']],
    columns = [['predicted', 'predicted', 'predicted'], ['neutral', 'positive', 'negative']])

              precision    recall  f1-score   support

           0       0.79      0.92      0.85        37
           1       0.00      0.00      0.00        13
           2       0.60      0.83      0.70        18

    accuracy                           0.72        68
   macro avg       0.46      0.58      0.52        68
weighted avg       0.59      0.72      0.65        68



Unnamed: 0_level_0,Unnamed: 1_level_0,predicted,predicted,predicted
Unnamed: 0_level_1,Unnamed: 1_level_1,neutral,positive,negative
actual,neutral,34,0,3
actual,positive,6,0,7
actual,negative,3,0,15
