## Import Libraries

In [14]:
import torch
from transformers import BertForSequenceClassification
import torch
import torch.nn as nn
import transformers
from transformers import AutoModel, BertTokenizerFast

## Loading our Trained Model

### Define the Default BERT Architecture

In [18]:
# Import the BERT-base pretrained model
BERT = AutoModel.from_pretrained('bert-base-uncased')

# Load the BERT tokenizer
# tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### Define Custom BERT Architecture

In [19]:
class customBERTArchitecture(nn.Module):
    def __init__(self, bert):
        super(customBERTArchitecture, self).__init__()
        self.bert = bert 
        
        # Dropout layer
        self.dropout = nn.Dropout(0.1)
        
        # ReLU activation function
        self.relu =  nn.ReLU()
        
        # Dense layer 1
        self.fullyConnected1 = nn.Linear(768, 512)     
        
        # Dense layer 2 (Output layer)
        self.fullyConnected2 = nn.Linear(512, 2)
        
        # Softmax activation function
        self.softmax = nn.LogSoftmax(dim=1)

    # Define the forward pass
    def forward(self, sent_id, mask):
        # Pass the inputs to the model  
        _, cls_hs = self.bert(sent_id, attention_mask=mask, return_dict=False)
        
        # Input layer
        x = self.fullyConnected1(cls_hs)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Output layer
        x = self.fullyConnected2(x)
        
        # Apply softmax activation
        x = self.softmax(x)
        return x

### Load the Weights of our Pre-trained Custom BERT Model

In [20]:
# Create an instance of the model
model = customBERTArchitecture(BERT)

# Load the saved weights
model.load_state_dict(torch.load('saved_weights.pt'))

# Set the model to evaluation mode
model.eval()

customBERTArchitecture(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_af

### Define GPU Here if Available

In [21]:
device = torch.device("cuda")

### Using Trained Model to Predict

In [None]:
# Get predictions for test data
with torch.no_grad():
    preds = model(testSequenceTensor.to(device), testMaskTensor.to(device))
    preds = preds.detach().cpu().numpy()

### Check Model's Performance on Testing Data

In [None]:
# model's performance
predications = np.argmax(preds, axis=1)
print(classification_report(testYTensor, predications))

### Plotting Change in Training and Validation Losses

#### Convert lists to arrays

In [None]:
# Convert trainingLosses into a numpy array
trainingLosses = np.array(trainingLosses)

# Convert validationLosses into a numpy array
validationLosses = np.array(validationLosses)

#### Creating an x-axis

In [None]:
X = np.arange(0, epochs)

In [None]:
X.shape, trainingLosses.shape

#### Plotting losses

In [None]:
plt.figure(figsize=(10, 8))
plt.grid()

plt.xlabel("Epochs")
plt.ylabel("Loss Value")

plt.plot(X, trainingLosses, label='Training Loss', marker='x', color='C1', alpha=0.6)
plt.plot(X, validationLosses, label='Validation Loss', marker='x', color='C4', alpha=0.4)

plt.legend()
plt.show()

---