In [1]:
import torch
import pandas as pd
from transformers import BertTokenizer

In [None]:
# Model location
model_file = "pytorch_model.bin"
# Load model
model = torch.load(model_file)
# Load test data csv
test_data_file = "../data/glue_data/test.csv"
test_data = pd.read_csv(test_data_file)
# Get only the sentence column
test_data = test_data["sentence"].to_list()

model_name = "bert-base-uncased"
# max sequence length for each document/sentence sample
max_length = 512
# load the tokenizer
tokenizer = BertTokenizer.from_pretrained(model_name, do_lower_case=True)
test_data = tokenizer(test_data, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt")
# Run predictions
predictions = model(**test_data)
# Get the predictions
predictions = torch.softmax(predictions[0], dim=1).detach().numpy()
# Get the labels
labels = torch.argmax(predictions, dim=1).detach().numpy()
# Create a dataframe
df = pd.DataFrame({"id": list(range(len(labels))), "label": labels})
# Save the dataframe
df.to_csv("predictions.csv", index=False)