# (以Transformers套件進行情緒分析(Sentiment analysis))

In [3]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.36.2-py3-none-any.whl.metadata (126 kB)
     ------------------------------------ 126.8/126.8 kB 497.9 kB/s eta 0:00:00
Collecting huggingface-hub<1.0,>=0.19.3 (from transformers)
  Downloading huggingface_hub-0.20.1-py3-none-any.whl.metadata (12 kB)
Collecting tokenizers<0.19,>=0.14 (from transformers)
  Downloading tokenizers-0.15.0-cp39-none-win_amd64.whl.metadata (6.8 kB)
Collecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.1-cp39-none-win_amd64.whl.metadata (3.8 kB)
Collecting fsspec>=2023.5.0 (from huggingface-hub<1.0,>=0.19.3->transformers)
  Downloading fsspec-2023.12.2-py3-none-any.whl.metadata (6.8 kB)
Downloading transformers-4.36.2-py3-none-any.whl (8.2 MB)
   ---------------------------------------- 8.2/8.2 MB 2.0 MB/s eta 0:00:00
Downloading huggingface_hub-0.20.1-py3-none-any.whl (330 kB)
   ---------------------------------------- 330.1/330.1 kB 3.4 MB/s eta 0:00:00
Downloading safetensors-0


[notice] A new release of pip is available: 23.3.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
# 載入相關套件
from transformers import pipeline

https://huggingface.co/docs/transformers/index

# (情緒分析(Sentiment analysis))

## /(load model)

In [6]:
classifier = pipeline('sentiment-analysis', #----->任務目標
'distilbert-base-uncased-finetuned-sst-2-english') #----->語料庫(基本解碼未細調英語版)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

## /(單句分析)

In [7]:
# 正面
print(classifier('We are very happy to show you the 🤗 Transformers library.'))

# 負面
print(classifier('I hate this movie.'))

# 否定句也可以正確分類
print(classifier('the movie is not bad.'))
print(classifier('I have to work'))

[{'label': 'POSITIVE', 'score': 0.9997795224189758}]
[{'label': 'NEGATIVE', 'score': 0.9996869564056396}]
[{'label': 'POSITIVE', 'score': 0.999536395072937}]
[{'label': 'POSITIVE', 'score': 0.5919761657714844}]


## /(斷句分析)

In [8]:
# 一次測試多筆
results = classifier(["We are very happy.",
                      "We hope you don't hate it."])
for result in results:
    print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

label: POSITIVE, with score: 0.9999
label: NEGATIVE, with score: 0.5309


## /(load model2)

In [9]:
# 載入多語系模型，支援 English, French, Dutch, German, Italian, Spanish
classifier = pipeline('sentiment-analysis', model='nlptown/bert-base-multilingual-uncased-sentiment')

config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [6]:
# 西班牙文(Spanish)
# 負面, I hate this movie
print(classifier('Odio esta pelicula.'))

# the movie is not bad.
print(classifier('la pelicula no esta mal.'))

[{'label': '1 star', 'score': 0.4615827798843384}]
[{'label': '3 stars', 'score': 0.6274546384811401}]


In [7]:
# 法文(French)
# 負面, I hate this movie
print(classifier('Je déteste ce film.'))

# the movie is not bad.
print(classifier('le film n\'est pas mal.'))

[{'label': '1 star', 'score': 0.631117582321167}]
[{'label': '3 stars', 'score': 0.5710768103599548}]
