In [1]:
!pip install datasets
!pip install transformers
!pip install sklearn

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [2]:
from transformers import pipeline
from transformers.pipelines.pt_utils import KeyDataset
from datasets import load_dataset
from sklearn.metrics import classification_report
import numpy as np

# Load the GoEmotions dataset
dataset = load_dataset("go_emotions")

# GoEmotions emotion labels
candidate_labels = [
    "admiration", "amusement", "anger", "annoyance", "approval", "caring",
    "confusion", "curiosity", "desire", "disappointment", "disapproval",
    "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief",
    "joy", "love", "nervousness", "optimism", "pride", "realization", "relief",
    "remorse", "sadness", "surprise", "neutral"
]

classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli",
                      device="cuda")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/9.40k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/2.77M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/350k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/347k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/43410 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5426 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5427 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [3]:
def classify_sample(examples):
    results = classifier(examples["text"], candidate_labels=candidate_labels, multi_label=True, batch_size=500)
    predicted_labels = [[label for label, score in zip(result["labels"], result["scores"]) if score > 0.5] for result in results]
    return {"predicted_labels": predicted_labels}

classified_samples = dataset["test"].map(classify_sample, batched=True, batch_size=500)


Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [4]:
# Convert ground truth and predictions to binary format
def binarize_labels(sample):
    binary_ground_truth = [1 if i in sample["labels"] else 0 for i in range(len(candidate_labels))]
    binary_predictions = [1 if label in sample["predicted_labels"] else 0 for label in candidate_labels]
    return {"binary_ground_truth": binary_ground_truth, "binary_predictions": binary_predictions}

# Apply binarization to classified samples
binary_data = classified_samples.map(binarize_labels)

# Extract binary ground truth and predictions
ground_truth = np.array([sample["binary_ground_truth"] for sample in binary_data])
predictions = np.array([sample["binary_predictions"] for sample in binary_data])

# Calculate precision, recall, F1-score for each label
report = classification_report(ground_truth, predictions, target_names=candidate_labels, zero_division=0)

print(report)


Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

                precision    recall  f1-score   support

    admiration       0.29      0.76      0.41       504
     amusement       0.24      0.81      0.37       264
         anger       0.16      0.54      0.24       198
     annoyance       0.11      0.68      0.19       320
      approval       0.11      0.45      0.17       351
        caring       0.04      0.64      0.07       135
     confusion       0.05      0.92      0.10       153
     curiosity       0.19      0.69      0.30       284
        desire       0.07      0.71      0.12        83
disappointment       0.06      0.77      0.12       151
   disapproval       0.09      0.79      0.16       267
       disgust       0.08      0.67      0.15       123
 embarrassment       0.02      0.54      0.04        37
    excitement       0.11      0.76      0.20       103
          fear       0.15      0.65      0.25        78
     gratitude       0.54      0.82      0.65       352
         grief       0.00      0.33      0.01  