In [None]:
import torch
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
from torch.utils.data import DataLoader
from datasets import Dataset

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Load data
test_essays = pd.read_csv('data/test.csv')

# Load pre-trained model
tokenizer_file = f"model/trained_tokenizer"
model_file = f"model/trained_lm"
config = AutoConfig.from_pretrained(model_file)
model = AutoModelForSequenceClassification.from_pretrained(model_file, local_files_only=True)

# Tokenize
def get_tokens(text, tokenizer, device, max_length):
    tokens = tokenizer(text, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt") # Get tokens
    tokens = {k: v.to(device) for k, v in tokens.items()} # Send to device
    return tokens

tokenizer = AutoTokenizer.from_pretrained(tokenizer_file, force_download=False)

test_data = Dataset.from_pandas(test_essays , preserve_index=False).with_format("torch")
max_length = config.max_length
test_data = test_data.map(lambda x: {"tokens": get_tokens(x["full_text"], tokenizer, device, max_length)}).remove_columns("full_text")

In [None]:
# Create predictions
all_predictions = np.zeros(len(test_data))
test_dataloader = DataLoader(test_data, batch_size=12, shuffle=False)

model.to(device)
with torch.no_grad():
    for i,batch in enumerate(test_dataloader):
        input_ids = batch['tokens']['input_ids'].squeeze(1).to(device)
        attention_mask = batch['tokens']['attention_mask'].squeeze(1).to(device)
        token_type_ids = batch['tokens']['token_type_ids'].squeeze(1).to(device)
        outputs = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        predictions = outputs['logits'].squeeze(1)

        # Transfer predictions to CPU if needed and detach
        predictions = predictions.cpu().detach().numpy()
        start_idx = i * test_dataloader.batch_size
        end_idx = start_idx + len(predictions)
        all_predictions[start_idx:end_idx] = predictions
               
# Clear CUDA cache to free up memory
# This is helpful when using a notebook for inference because memory doesn't always clear nicely
if device != 'cpu':
    del input_ids, attention_mask, token_type_ids, outputs, predictions
    torch.cuda.empty_cache()