### Tokenizer 使用

In [1]:
from transformers import AutoTokenizer

tokenizer_name = "uer/roberta-base-finetuned-jd-full-chinese"
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
string_arr = [
    "飲料很好喝",
    "環境很髒，還有老鼠"
]
inputs = tokenizer(string_arr, padding=True, truncation=True, return_tensors="pt")
print(inputs)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'input_ids': tensor([[ 101, 7614, 3160, 2523, 1962, 1600,  102,    0,    0,    0,    0],
        [ 101, 4472, 1862, 2523, 7766, 8024, 6917, 3300, 5439, 7962,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


### Transformers model使用

In [3]:
from transformers import AutoModelForSequenceClassification

model_name = "uer/roberta-base-finetuned-jd-full-chinese"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
outputs = model(**inputs)

In [4]:
# print output's logits
print(outputs.logits)

tensor([[-2.2921, -0.9786, -0.0671,  0.8580,  0.9130],
        [ 0.7283,  0.8047,  0.2435, -1.3489, -2.3895]],
       grad_fn=<AddmmBackward0>)


### 將結果過 softmax

In [5]:
import torch

predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)

tensor([[0.0161, 0.0600, 0.1493, 0.3766, 0.3979],
        [0.3491, 0.3768, 0.2150, 0.0437, 0.0154]], grad_fn=<SoftmaxBackward0>)


### 查看model label

In [6]:
model.config.id2label

{0: 'star 1', 1: 'star 2', 2: 'star 3', 3: 'star 4', 4: 'star 5'}

### 以上程式碼簡化

In [7]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
classifier(
    [
    "飲料很好喝",
    "環境很髒，還有老鼠"
    ]
)

[{'label': 'star 5', 'score': 0.39790040254592896},
 {'label': 'star 2', 'score': 0.3767957389354706}]

# Tensorflow 模型加載

In [13]:
from transformers import AutoTokenizer, TFAutoModel

tf_model = TFAutoModel.from_pretrained("uer/roberta-base-finetuned-jd-full-chinese", from_pt=True)

ImportError: 
TFAutoModel requires the TensorFlow library but it was not found in your environment.
However, we were able to find a PyTorch installation. PyTorch classes do not begin
with "TF", but are otherwise identically named to our TF classes.
If you want to use PyTorch, please use those classes instead!

If you really do want to use TensorFlow, please follow the instructions on the
installation page https://www.tensorflow.org/install that match your environment.
