# 查看pipeline所支持的任务类型

In [1]:
from transformers.pipelines import SUPPORTED_TASKS
from transformers import pipeline
import transformers

In [3]:
for key, value in SUPPORTED_TASKS.items():
    print(key, value["type"])

audio-classification audio
automatic-speech-recognition multimodal
text-to-audio text
feature-extraction multimodal
text-classification text
token-classification text
question-answering text
table-question-answering text
visual-question-answering multimodal
document-question-answering multimodal
fill-mask text
summarization text
translation text
text2text-generation text
text-generation text
zero-shot-classification text
zero-shot-image-classification multimodal
zero-shot-audio-classification multimodal
image-classification image
image-feature-extraction image
image-segmentation multimodal
image-to-text multimodal
image-text-to-text multimodal
object-detection multimodal
zero-shot-object-detection multimodal
depth-estimation image
video-classification video
mask-generation multimodal
image-to-image image


# 使用任务名直接创建pipeline

In [6]:
# 此处传入上述打出来的任务名即可创建
# 默认载入的都是英文模型
pipe = transformers.pipeline("text-classification")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


In [7]:
pipe("very good")

  attn_output = torch.nn.functional.scaled_dot_product_attention(


[{'label': 'POSITIVE', 'score': 0.9998520612716675}]

# 指定任务类型和模型后再创建Pipeline

In [None]:
# 模型网址：https://huggingface.co/models，传入模型完整名称即可
pipe2 = transformers.pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese")

# 先加载模型再创建Pipeline

In [17]:
# 使用此方法需同时指定model和tokenizer
model = transformers.AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
tokenizer = transformers.AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
pipe3 = transformers.pipeline("text-classification", model=model, tokenizer=tokenizer)
pipe3

Device set to use cuda:0


<transformers.pipelines.text_classification.TextClassificationPipeline at 0x227d34c2370>

In [15]:
pipe3("我觉得不太行")
pipe3.model.device

device(type='cuda', index=0)

# 确定Pipeline参数

In [20]:
qa_pipe = transformers.pipeline("question-answering",model="uer/roberta-base-chinese-extractive-qa")
qa_pipe

config.json:   0%|          | 0.00/452 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/407M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/216 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/407M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cuda:0


<transformers.pipelines.question_answering.QuestionAnsweringPipeline at 0x227e29d5d60>

In [None]:
# 根据上面打印出的结果进行调用，然后Crtl+点击即可访问，__call__属性即为参数描述
transformers.pipelines.question_answering.QuestionAnsweringArgumentHandler

# Pipeline背后的实现

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [7]:
tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
model = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [4]:
input_txt = "我觉得不太行"
inputs = tokenizer(input_txt, return_tensors="pt")  # pt表示pytorch tensor
print(inputs)

{'input_ids': tensor([[ 101, 2769, 6230, 2533,  679, 1922, 6121,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]])}


In [6]:
# **的作用在于将字典拆成关键字传入
# 即：input_ids = tensor(...), token_type_ids = tensor(...)
res = model(**inputs)
print(res)

SequenceClassifierOutput(loss=None, logits=tensor([[ 1.7459, -1.8919]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)


In [9]:
logits = res.logits
logits = torch.softmax(logits, dim=1)   # 上述结果为二维，按照第二个维度进行
print(logits)   # 此输出结果即为类别概率

tensor([[0.9744, 0.0256]], grad_fn=<SoftmaxBackward0>)


In [15]:
pred = torch.argmax(logits).item()
pred

0

In [19]:
config = model.config
print(config)
result = model.config.id2label.get(pred)
result

BertConfig {
  "_attn_implementation_autoset": true,
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "negative (stars 1, 2 and 3)",
    "1": "positive (stars 4 and 5)"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "negative (stars 1, 2 and 3)": 0,
    "positive (stars 4 and 5)": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.51.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}



'negative (stars 1, 2 and 3)'