# 查看Pipeline支持的任务类型

In [None]:
from transformers.pipelines import SUPPORTED_TASKS

In [None]:
from pprint import pprint
pprint(SUPPORTED_TASKS.keys())

In [None]:
for k, v in SUPPORTED_TASKS.items():
    print(k, v)

# Pipeline的创建与使用方式

In [None]:
from transformers import pipeline, QuestionAnsweringPipeline

## 根据任务类型直接创建Pipeline, 默认都是英文的模型

一条管道由以下部分组成：

- 一个或多个用于预处理模型输入的组件，例如[分词器] (tokenizer)、[图像处理器] (image_processor)、[特征提取器] (feature_extractor) 或 [处理器] (processors)。
- 一个从输入中生成预测的[模型](model)。
- 可选的后处理步骤用于优化模型的输出，这些步骤也可以由处理器来处理。


如果未选择模型将自动下载

- pipeline() 这个函数会根据你指定的 task (任务) 参数(pipline的第一个参数)，为你创建一个专门用于处理该特定任务的、预先配置好的工具对象.(ctrl+里面会有task说明)

包含这几个参数：
task  任务，会形成该任务的pipeline
model  指定模型
config  设置模型架构：层数多少，分类头多少
tokenizer  分词，将词转化成token
device = 0  在gpu上运行

In [None]:
from transformers import pipeline

pipeline = pipeline(task="text-generation", model="google/gemma-2-2b", device=0)
pipeline("the secret to baking a really good cake is ")

In [None]:
pipe = pipeline("text-classification")

In [None]:
pipe(["very good!", "vary bad!"])

## 指定任务类型，再指定模型，创建基于指定模型的Pipeline

In [None]:
# https://huggingface.co/models
pipe = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese")

In [None]:
pipe("我觉得不太行！")

## 预先加载模型，再创建Pipeline

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# 这种方式，必须同时指定model和tokenizer
model = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)

In [None]:
pipe("我觉得不太行！")

In [None]:
pipe.model.device

In [None]:
import torch
import time
times = []
for i in range(100):
    torch.cuda.synchronize()
    start = time.time()
    pipe("我觉得不太行！")
    torch.cuda.synchronize()
    end = time.time()
    times.append(end - start)
print(sum(times) / 100)

## 使用GPU进行推理

In [None]:
pipe = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese", device=0)

In [None]:
pipe.model.device

In [None]:
import torch
import time
times = []
for i in range(100):
    torch.cuda.synchronize()
    start = time.time()
    pipe("我觉得不太行！")
    torch.cuda.synchronize()
    end = time.time()
    times.append(end - start)
print(sum(times) / 100)

## 确定Pipeline参数

In [None]:
qa_pipe = pipeline("question-answering", model="uer/roberta-base-chinese-extractive-qa")

In [None]:
qa_pipe

In [None]:
QuestionAnsweringPipeline

In [None]:
qa_pipe(question="中国的首都是哪里？", context="中国的首都是北京", max_answer_len=1)

# 其他Pipeline示例

In [None]:
checkpoint = "google/owlvit-base-patch32"
detector = pipeline(model=checkpoint, task="zero-shot-object-detection")

In [None]:
import requests
from PIL import Image

url = "https://unsplash.com/photos/oj0zeY2Ltk4/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MTR8fHBpY25pY3xlbnwwfHx8fDE2Nzc0OTE1NDk&force=true&w=640"
im = Image.open(requests.get(url, stream=True).raw)
im

In [None]:
predictions = detector(
    im,
    candidate_labels=["hat", "sunglasses", "book"],
)
predictions

In [None]:
from PIL import ImageDraw

draw = ImageDraw.Draw(im)

for prediction in predictions:
    box = prediction["box"]
    label = prediction["label"]
    score = prediction["score"]
    xmin, ymin, xmax, ymax = box.values()
    draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
    draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="red")

im

# Pipeline背后的实现

In [None]:
from transformers import *
import torch

In [None]:
tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
model = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")

tokenizer和model都引入的同一个路径，但是实际引入的是这个仓库里的不同内容，
tokenizer引入的是tokenizer_config.json 等
model引入的是config.json等

In [None]:
input_text = "我觉得不太行！"
inputs = tokenizer(input_text, return_tensors="pt")
inputs

user_data = {
    "name": "张三",
    "age": 25
}
那么**user_data 就等同于 name="张三", age=25

In [None]:
res = model(**inputs)
res

In [None]:
logits = res.logits
logits = torch.softmax(logits, dim=-1)
logits

In [None]:
pred = torch.argmax(logits).item()
pred

In [None]:
model.config.id2label

In [None]:
result = model.config.id2label.get(pred)
result