# Test

## Loading Dataset

In [1]:
# load the dataset and tokenizer
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

# for train and val
raw_datasets = load_dataset("glue", "mnli")

## Load saved model

In [2]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Define the path to your saved model
output_dir = "C:/Users/steve/HuggingFace Models/BERT_MNLI_model"

# Load the model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

## Tokenize

In [3]:
# tokenize the dataset
def tokenize_function(example):
    return tokenizer(example["premise"], example["hypothesis"], truncation=True)
# batch processing
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

In [6]:
# post-processing to prepare for dataloader
tokenized_datasets = tokenized_datasets.remove_columns(["label", "premise", "hypothesis", "idx"])
tokenized_datasets.set_format("torch") # Pytorch tensors

In [7]:
print(tokenized_datasets["test_matched"].column_names)
print(tokenized_datasets["test_mismatched"].column_names)

['input_ids', 'token_type_ids', 'attention_mask']
['input_ids', 'token_type_ids', 'attention_mask']


## Data Loader

In [8]:
from torch.utils.data import DataLoader
from datasets import concatenate_datasets

# For dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Prepare test data in batch size of 8
test_matched_dataloader = DataLoader(
    tokenized_datasets["test_matched"], batch_size=8, collate_fn=data_collator
)

test_mismatched_dataloader = DataLoader(
    tokenized_datasets["test_mismatched"], batch_size=8, collate_fn=data_collator
)

## Move model to GPU

In [9]:
# Move model to GPU if avail or CPU
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
device

device(type='cuda')

## Test on matched dataset

In [11]:
from tqdm import tqdm  # import tqdm for progress bar

# Set the model to evaluation mode (deactivates dropout and other training behaviors)
model.eval()  # This ensures layers like dropout are deactivated, making predictions deterministic

# test_matched_predictions
test_matched_predictions = []

# Loop over each batch in the evaluation dataset (eval_dataloader)
for batch in tqdm(test_matched_dataloader, desc="Running inference"):  # eval_dataloader yields batches of evaluation data
    # Move each element in the batch to the device (GPU/CPU)
    batch = {k: v.to(device) for k, v in batch.items() if k != "labels"}  # Move tensors to device

    # Disable gradient calculation as we are in evaluation mode
    with torch.no_grad():  # No need to compute gradients during evaluation
        # Forward pass: pass the batch through the model
        outputs = model(**batch)  # Forward pass to get model outputs (logits)

    # Get the logits (raw predictions before applying any activation function like softmax)
    logits = outputs.logits  # Extract the logits from the model's outputs

    # Convert logits to predictions by selecting the index with the highest value
    predictions = torch.argmax(logits, dim=-1)  # Get the predicted class (index of max logit)
    # append to test_matched_predictions
    test_matched_predictions.extend(predictions.cpu().numpy())

Running inference: 100%|██████████| 1225/1225 [00:42<00:00, 28.69it/s]


## Test on mismatched dataset

In [14]:
from tqdm import tqdm  # import tqdm for progress bar

# Set the model to evaluation mode (deactivates dropout and other training behaviors)
model.eval()  # This ensures layers like dropout are deactivated, making predictions deterministic

# test_matched_predictions
test_mismatched_predictions = []

# Loop over each batch in the evaluation dataset (eval_dataloader)
for batch in tqdm(test_mismatched_dataloader, desc="Running inference"):  # eval_dataloader yields batches of evaluation data
    # Move each element in the batch to the device (GPU/CPU)
    batch = {k: v.to(device) for k, v in batch.items()}  # Move tensors to device

    # Disable gradient calculation as we are in evaluation mode
    with torch.no_grad():  # No need to compute gradients during evaluation
        # Forward pass: pass the batch through the model
        outputs = model(**batch)  # Forward pass to get model outputs (logits)

    # Get the logits (raw predictions before applying any activation function like softmax)
    logits = outputs.logits  # Extract the logits from the model's outputs

    # Convert logits to predictions by selecting the index with the highest value
    predictions = torch.argmax(logits, dim=-1)  # Get the predicted class (index of max logit)
    # append to test_matched_predictions
    test_mismatched_predictions.extend(predictions.cpu().numpy())

Running inference: 100%|██████████| 1231/1231 [00:43<00:00, 28.34it/s]
