## 1. Import Modules and Data
Load tokenizer from huggingface.

In [2]:
from transformers import AutoTokenizer
import torch
import config 
tokenizer = AutoTokenizer.from_pretrained(config.pretrained_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("load tokenizer")

load tokenizer


## 2. Load Trained Model

In [2]:
import torch
from modules.bert import BERTTextClassifier
import config 

bert_clf = BERTTextClassifier.from_pretrained(
    config.pretrained_path, 
    num_frozen_layers=12,
)

bert_clf.load_state_dict(torch.load(config.checkpoint_dir / "bert_clf_3.pth")["model"])
bert_clf.to(device)
print("load done!")

Number of trainable parameters: 24.43M
load done!


## 3. Inference
Here I consider text classification task with simple example as below, where '0' represents negative and '1' represents positive.

In [3]:
text = "I was beaten by you!"

input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0).to(device)

predicted_class = bert_clf.text_clf(input_ids)

print(f"Predicted class index: {predicted_class.item()}")

Predicted class index: 0


In [4]:
text = "I love the LLM world!"

input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0).to(device)

predicted_class = bert_clf.text_clf(input_ids)

print(f"Predicted class index: {predicted_class.item()}")

Predicted class index: 1


In [5]:
text = "Successful test!"

input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0).to(device)

predicted_class = bert_clf.text_clf(input_ids)

print(f"Predicted class index: {predicted_class.item()}")

Predicted class index: 1


In [6]:
text = "HA HA HA!"

input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0).to(device)

predicted_class = bert_clf.text_clf(input_ids)

print(f"Predicted class index: {predicted_class.item()}")

Predicted class index: 1
