Getting pretrained model from HuggingFace

In [3]:
model_name="nlptown/bert-base-multilingual-uncased-sentiment"

In [1]:
from transformers import AutoModelForSequenceClassification

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model = AutoModelForSequenceClassification.from_pretrained(model_name)

Getting the tokenizer of the model

In [5]:
from transformers import AutoTokenizer

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

Creating a classifier

In [7]:
from transformers import pipeline

In [9]:
classifier = pipeline("sentiment-analysis", model=model,tokenizer=tokenizer)

In [46]:
import pandas as pd
texts = ["Je n'ai pas du tout aimé ce cours.", "Eigentlich hat mir der Kurs gefallen. Es hat mir gefallen.", "I love you"]
outputs = []
classification_results = classifier(texts)
for text,result in zip(texts,classification_results):
    outputs.append({'text':text,'label':result['label'],'score':result['score']})

pd.DataFrame(outputs)

Unnamed: 0,text,label,score
0,Je n'ai pas du tout aimé ce cours.,1 star,0.472083
1,Eigentlich hat mir der Kurs gefallen. Es hat m...,3 stars,0.367507
2,I love you,5 stars,0.854681


Training our model

In [51]:
batch= tokenizer(
    ["I liked NLP", "I do not like the course."],
    max_length = 521,
    truncation = True,
    padding = True,
    return_tensors = "pt"
)
print(batch)

{'input_ids': tensor([[  101,   151, 11531, 10163, 19848, 10373,   102,     0,     0],
        [  101,   151, 10154, 10497, 11531, 10103, 13039,   119,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1]])}


In [53]:
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [54]:
output = model(**batch)

In [55]:
from torch import nn

In [56]:
prediction = nn.functional.softmax(output.logits, dim=1)
print(prediction)

tensor([[0.0173, 0.0330, 0.2156, 0.4621, 0.2720],
        [0.2888, 0.5213, 0.1811, 0.0077, 0.0012]], grad_fn=<SoftmaxBackward0>)


Saving the model

In [57]:
save_directory = "./save_pretrained"
tokenizer.save_pretrained(save_directory=save_directory)
model.save_pretrained(save_directory=save_directory)

Getting the saved model from directory

In [58]:
model= AutoModelForSequenceClassification.from_pretrained(
    pretrained_model_name_or_path=save_directory
)