In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer,TrainingArguments
import torch

In [2]:
# To form input object
class processDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [3]:
def class_prediction(input:str, model_checkpoint:str) -> str:
    '''
    input: an input description string
    model_checkpoint: the position of the model
    returns the predicted class of input description string
    '''
    # load model
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=3)
    
    # get predicted result
    args = TrainingArguments(disable_tqdm=True, output_dir="results")
    trainer = Trainer(model=model,tokenizer=tokenizer,args=args)
    item = processDataset(tokenizer([input],padding=True,truncation=False),[1])
    preds = trainer.predict(item).predictions.argmax(-1)
    
    # decode the result into hardware class
    if preds == 1:
        return 'Resistors'
    elif preds == 2:
        return 'Capacitors'
    else:
        return 'Others'

In [4]:
# call the class prediction function
model_checkpoint = './class_prediction_model'
input = 'CAP,CHIP CERAMIC 0.2PF 0.1PF C0G/NP0 0402 50V ROHS'
result = class_prediction(input, model_checkpoint)
print(result)

Capacitors
