## 准备工作
- 设置模型下载路径
- 启动学术加速

In [None]:
import os
# 设置模型路径，下载到autodl的临时数据盘
os.environ['HF_HOME'] = '/root/autodl-temp/hf'
os.environ['HF_HUB_CACHE'] = '/root/autodl-tmp/hf/hub'

# 启动autodl学术加速
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [3]:
from transformers import pipeline

### NLP - Text Classification - 文本审核
模型主页：https://huggingface.co/KoalaAI/Text-Moderation

In [4]:
pipe = pipeline(task="sentiment-analysis", model="KoalaAI/Text-Moderation")

In [5]:
# 下面的例子可以分别检测出：仇恨、暴力、性、自残的相关内容
text_list = [
    "I hate all Yankees!",
    "If you don't turn off your phone in a movie theater, I swear I'll take it and shovel it into your fucking ass!",
    "hey babe, wanna come in and have a good night? We have loads of good girls waiting...",
    "I swear if you come any closer I'll just jump!",
]

pipe(text_list)

[{'label': 'H', 'score': 0.6020551323890686},
 {'label': 'V', 'score': 0.3086363971233368},
 {'label': 'S', 'score': 0.370002806186676},
 {'label': 'SH', 'score': 0.31591054797172546}]

### NLP - Token Classification - 医学领域命名实体分类
模型主页：https://huggingface.co/Clinical-AI-Apollo/Medical-NER

In [6]:
pipe = pipeline("token-classification", model="Clinical-AI-Apollo/Medical-NER", aggregation_strategy='simple')
"""
22岁的男孩曾经罹患鼻中隔偏曲，这使他的呼吸变得困难，后来他通过我们的微创手术康复了正常的呼吸，但是还是需要日常使用糖皮质激素来缓解鼻炎。
"""
result = pipe("The 22-year-old male patient previously suffered from deviated nasal septum, which caused difficulty in breathing. He underwent our minimally invasive surgery to restore normal breathing. However, he still requires daily use of corticosteroids to alleviate rhinitis.")
result

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[{'entity_group': 'AGE',
  'score': 0.93260413,
  'word': '22-year-old',
  'start': 3,
  'end': 15},
 {'entity_group': 'SEX',
  'score': 0.76922107,
  'word': 'male',
  'start': 15,
  'end': 20},
 {'entity_group': 'DISEASE_DISORDER',
  'score': 0.52708817,
  'word': 'deviated',
  'start': 53,
  'end': 62},
 {'entity_group': 'SIGN_SYMPTOM',
  'score': 0.33194998,
  'word': 'nasal',
  'start': 62,
  'end': 68},
 {'entity_group': 'DISEASE_DISORDER',
  'score': 0.43514776,
  'word': 'septum',
  'start': 68,
  'end': 75},
 {'entity_group': 'SIGN_SYMPTOM',
  'score': 0.65501827,
  'word': 'difficulty in breathing',
  'start': 89,
  'end': 113},
 {'entity_group': 'DETAILED_DESCRIPTION',
  'score': 0.6294477,
  'word': 'minimally invasive',
  'start': 131,
  'end': 150},
 {'entity_group': 'THERAPEUTIC_PROCEDURE',
  'score': 0.54569775,
  'word': 'surgery',
  'start': 150,
  'end': 158},
 {'entity_group': 'SIGN_SYMPTOM',
  'score': 0.39747745,
  'word': 'breathing',
  'start': 176,
  'end': 186

### NLP - Question Answering - 基于上下文的问答

模型主页：https://huggingface.co/deepset/roberta-base-squad2

In [7]:
question_answerer = pipeline(task="question-answering", model='deepset/roberta-base-squad2')

In [8]:
preds = question_answerer(
    question="What is the best programming language?",
    context="The best programming language is PHP",
)
print(
    f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
)

score: 0.924, start: 33, end: 36, answer: PHP


### NLP - Text Summarization - 文本总结

模型主页：https://huggingface.co/facebook/bart-large-cnn

In [9]:
summarizer = pipeline(task="summarization",
                      model="facebook/bart-large-cnn",
                      min_length=8,
                      max_length=32,
)

In [10]:
summarizer(
    """
    In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, 
    replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. 
    For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. 
    On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. 
    In the former task our best model outperforms even all previously reported ensembles.
    """
)


[{'summary_text': 'The Transformer is the first sequence transduction model based entirely on attention. It replaces the recurrent layers most commonly used in encoder-decoder'}]

### Audio - Audio Classification - 音频分类
模型主页：https://huggingface.co/MIT/ast-finetuned-audioset-10-10-0.4593

#### 前置依赖包安装

建议在命令行安装必要的音频数据处理包: ffmpeg

```bash
apt update & apt upgrade \
apt install -y ffmpeg \
pip install ffmpeg ffmpeg-python \
```

In [None]:
# 安装依赖
!pip install datasets
!pip install soundfile
!pip install librosa

!pip install kenlm
!pip install pyctcdecode

In [13]:
classifier = pipeline(task="audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")



In [14]:
# 下载一套data set
from datasets import load_dataset
dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
dataset = dataset.sort("id")

In [15]:
for i in range(3):
    input = dataset[i]['audio']['array']
    preds = classifier(input)
    preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
    print(preds)

[{'score': 0.8412, 'label': 'Speech'}, {'score': 0.0248, 'label': 'Speech synthesizer'}, {'score': 0.0196, 'label': 'Narration, monologue'}, {'score': 0.0102, 'label': 'Male speech, man speaking'}, {'score': 0.0069, 'label': 'Gasp'}]
[{'score': 0.8907, 'label': 'Speech'}, {'score': 0.0132, 'label': 'Speech synthesizer'}, {'score': 0.0105, 'label': 'Narration, monologue'}, {'score': 0.0082, 'label': 'Male speech, man speaking'}, {'score': 0.0079, 'label': 'Single-lens reflex camera'}]
[{'score': 0.7512, 'label': 'Speech'}, {'score': 0.1611, 'label': 'Speech synthesizer'}, {'score': 0.0499, 'label': 'Narration, monologue'}, {'score': 0.0085, 'label': 'Male speech, man speaking'}, {'score': 0.0027, 'label': 'Inside, small room'}]


### Audio - Automatic speech recognition（ASR）- 自动语音识别

模型主页：https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english

In [None]:
# 使用 `model` 参数指定模型
transcriber = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")

In [17]:
for i in range(3):
    input = dataset[i]['audio']['array']
    preds = transcriber(input)
    print(preds)

{'text': 'mr quilter is the apostle of the middle classes and we are glad to welcome his gospel'}
{'text': "nor is mr quilter's manner less interesting than his matter"}
{'text': 'he tells us that at this festive season of the year with christmas and roast beef looming before us symbolies drawn from eating and its results occur most readily to the mind'}


### Computer Vision - Image Classificaiton - 图像分类

模型主页：https://huggingface.co/microsoft/resnet-50

![bc](data/Border_Collie_600.jpg)

In [18]:
classifier = pipeline(task="image-classification", model='microsoft/resnet-50')

In [19]:
preds = classifier('data/Border_Collie_600.jpg')
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
print(*preds, sep="\n")

{'score': 0.7364, 'label': 'Border collie'}
{'score': 0.1358, 'label': 'collie'}
{'score': 0.1031, 'label': 'Shetland sheepdog, Shetland sheep dog, Shetland'}
{'score': 0.0022, 'label': 'Cardigan, Cardigan Welsh corgi'}
{'score': 0.0014, 'label': 'Norwegian elkhound, elkhound'}


### Computer Vision - Object Detection - 目标检测

模型主页：https://huggingface.co/microsoft/table-transformer-detection
这是一个专门用来做表格检测的模型，高端用法在这里：https://github.com/NielsRogge/Transformers-Tutorials/blob/master/Table%20Transformer/Inference_with_Table_Transformer_(TATR)_for_parsing_tables.ipynb

In [None]:
detector = pipeline(task="object-detection", model='microsoft/table-transformer-detection')

In [21]:
preds = detector('data/example_excel_file.png')
preds

[{'score': 0.6732780337333679,
  'label': 'table',
  'box': {'xmin': 13, 'ymin': 71, 'xmax': 641, 'ymax': 415}}]