In [2]:
# Pipeline的使用
# pipeline 是一种设计方式，将模型的输入、输出、预处理、后处理等步骤封装在一起，方便使用。

from transformers.pipelines import SUPPORTED_TASKS

In [5]:
for k,v in SUPPORTED_TASKS.items():
    print(k,v["type"])

audio-classification audio
automatic-speech-recognition multimodal
text-to-audio text
feature-extraction multimodal
text-classification text
token-classification text
question-answering text
table-question-answering text
visual-question-answering multimodal
document-question-answering multimodal
fill-mask text
summarization text
translation text
text2text-generation text
text-generation text
zero-shot-classification text
zero-shot-image-classification multimodal
zero-shot-audio-classification multimodal
image-classification image
image-feature-extraction image
image-segmentation multimodal
image-to-text multimodal
image-text-to-text multimodal
object-detection multimodal
zero-shot-object-detection multimodal
depth-estimation image
video-classification video
mask-generation multimodal
image-to-image image


In [6]:
# Pipeline的创建和使用方式
from transformers import pipeline


In [13]:
# 1. 根据任务类型直接创建Pipeline，默认都是英文模型
pipe = pipeline("text-classification")

result = pipe("like you")
print(result)



No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0


[{'label': 'POSITIVE', 'score': 0.999830961227417}]


In [16]:
# 2. 根据任务类型和模型名称创建Pipeline
pipe = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese")

result = pipe("I love you")
print(result)

Device set to use mps:0


[{'label': 'positive (stars 4 and 5)', 'score': 0.9009531140327454}]


  return forward_call(*args, **kwargs)


In [18]:
result = pipe("今天天气真差劲呀！")
print(result)

[{'label': 'negative (stars 1, 2 and 3)', 'score': 0.9818915724754333}]


  return forward_call(*args, **kwargs)


In [22]:
# 3. 预先加载模型和分词器，再创建 pipeline
from transformers import AutoModelForSequenceClassification,AutoTokenizer

model = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")

pipe=pipeline("text-classification",model=model,tokenizer=tokenizer)

result=pipe("今天天气真差劲呀！")
print(result)

Device set to use mps:0


[{'label': 'negative (stars 1, 2 and 3)', 'score': 0.9818915724754333}]


  return forward_call(*args, **kwargs)


In [23]:
pipe=pipeline("text-classification",model=model)

result=pipe("今天天气真差劲呀！")
print(result)

Exception: Impossible to guess which tokenizer to use. Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer.

In [24]:
pipe.model.device

device(type='mps', index=0)

In [38]:
pipe = pipeline("text-classification",model="uer/roberta-base-finetuned-dianping-chinese",device="cpu")

Device set to use cpu


In [40]:
import torch
import time
times = []
for i in range(100):
    torch.mps.synchronize()
    start = time.time()
    pipe("今天天气真差劲呀！")
    end = time.time()
    times.append(end - start)


print(sum(times)/100)

0.04808483839035034


In [42]:
pipe = pipeline("text-classification",model="uer/roberta-base-finetuned-dianping-chinese",device=0)

Device set to use mps:0


In [45]:
import torch
import time
times = []
for i in range(100):
    torch.mps.synchronize()
    start = time.time()
    pipe("今天天气真差劲呀！")
    end = time.time()
    times.append(end - start)


print(sum(times)/100)

0.011895489692687989


In [48]:
# 4. 使用pipeline的batch参数
pipe = pipeline("text-classification",model="uer/roberta-base-finetuned-dianping-chinese",batch_size=10)
pipe

Device set to use mps:0


<transformers.pipelines.text_classification.TextClassificationPipeline at 0x11eb09370>

In [None]:
from transformers import TextClassificationPipeline
TextClassificationPipeline?
 

[0;31mInit signature:[0m [0mTextClassificationPipeline[0m[0;34m([0m[0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Text classification pipeline using any `ModelForSequenceClassification`. See the [sequence classification
examples](../task_summary#sequence-classification) for more information.

Example:

```python
>>> from transformers import pipeline

>>> classifier = pipeline(model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
>>> classifier("This movie is disgustingly good !")
[{'label': 'POSITIVE', 'score': 1.0}]

>>> classifier("Director tried too much.")
[{'label': 'NEGATIVE', 'score': 0.996}]
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This text classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"sentiment-analysis"` (for classifying sequences according to positive or negative sentiments).

If multiple c

In [None]:
# 其他 pipeline 展示
checkpoint = "google/owlvit-base-patch32"

In [52]:
# pipeline 的背后实现
tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
model = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")

In [57]:
input_text = "今天天气真差劲呀！"
inputs = tokenizer(input_text,return_tensors='pt')
inputs

{'input_ids': tensor([[ 101,  791, 1921, 1921, 3698, 4696, 2345, 1226, 1435, 8013,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [69]:
res = model(**inputs) # 利用字典解包的方式，将输入的参数传给模型
# res = model(input_ids=inputs["input_ids"], token_type_ids=inputs["token_type_ids"], attention_mask=inputs["attention_mask"] )
res

SequenceClassifierOutput(loss=None, logits=tensor([[ 1.9391, -2.0540]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
logits = res.logits # 提取模型输出的 logits 结果
logits = torch.softmax(logits, dim=-1) # 将模型输出的 logits 结果给归一化
logits

tensor([[0.9819, 0.0181]], grad_fn=<SoftmaxBackward0>)

In [72]:
pred = torch.argmax(logits).item() # # 取概率最大的类别索引，并转为Python整数
pred # 显示预测的类别编号

0

In [73]:
model.config

BertConfig {
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "negative (stars 1, 2 and 3)",
    "1": "positive (stars 4 and 5)"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "negative (stars 1, 2 and 3)": 0,
    "positive (stars 4 and 5)": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.54.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [74]:
model.config.id2label

{0: 'negative (stars 1, 2 and 3)', 1: 'positive (stars 4 and 5)'}

In [None]:
result = model.config.id2label.get(pred) # 使用.get()字典函数安全取值
result

'negative (stars 1, 2 and 3)'

In [82]:

print(inputs.get("input_ids"))
print()
print(inputs.get("input_is"))

tensor([[ 101,  791, 1921, 1921, 3698, 4696, 2345, 1226, 1435, 8013,  102]])

None
