In [None]:
!pip install transformers==4.45.2 setfit accelerate datasets sentence-transformers protobuf wandb

In [None]:
import os
import time
import json
import torch
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report
from datasets import Dataset, load_dataset, DatasetDict
from setfit import SetFitModel, Trainer, TrainingArguments, sample_dataset

### Keys

We need HuggingFace access tokens to upload the fine-tuned models to the HuggingFace repository and Weights & Biases (WandB) API keys to record the training metrics in WandB.

In [None]:
os.environ["HF_TOKEN"]=""
os.environ["WANDB_API_KEY"]=""

### Function to calculate F1 score

In [None]:
def calculate_f1_score(y_true, y_pred):
    """
    Calculates micro and macro F1-scores given the predicted and actual labels

    Parameters
    ==========
    y_true (numpy array): Actual labels
    y_pred (numpy array): Predicted labels

    Returns
    =======
    dict: A dictionary containing micro f1 and macro f1 scores.
    """
    # Generate a classification report to compute detailed metrics
    clf_dict = classification_report(
        y_true,
        y_pred,
        zero_division=0,
        output_dict=True
    )

    return {
        "micro f1": clf_dict["micro avg"]["f1-score"],
        "macro f1": clf_dict["macro avg"]["f1-score"]
    }

## Dataset creation

SetFit employs contrastive learning to finetune embedding models. This training approach involves creating positive and negative pairs of sentences. A sentence pair will be positive if both of the sentences are of the same class, and negative otherwise. For example, in the case of binary “positive”-“negative” sentiment analysis, ("The movie was awesome", "I loved it") is a positive pair, and ("The movie was awesome", "It was quite disappointing") is a negative pair.

Let's assume there are 3 sentences in the dataset each with a different label. While generating contrastive pairs, we can use (sentence A, sentence B), (sentence A, sentence C)

In [None]:
# Load the dataset from the Hugging Face Hub
dataset = load_dataset("bhujith10/multi_class_classification_dataset")

In [None]:
train_dataset = dataset["train"]
eval_dataset = dataset["val"]
test_dataset = dataset["test"]

In [None]:
"""
SetFit will generate positive and negative pairs of sentences for contrastive training. Higher the number of sentences, exponential will be the number of pairs.
Hence, we sample few sentences and then generate pairs.
"""

tmp_train_dataset = train_dataset.select(range(150)).shuffle()
tmp_eval_dataset = eval_dataset.select(range(50)).shuffle()

## SetFit

In [None]:
labels=['Computer Science', 'Physics', 'Mathematics', 'Statistics', 'Quantitative Biology', 'Quantitative Finance']

checkpoint = "google-bert/bert-large-uncased"

# Load a SetFit model from Hub
model = SetFitModel.from_pretrained(
    checkpoint,
    multi_target_strategy="one-vs-rest",
    use_differentiable_head=True,
    head_params={"out_features": len(labels)},
    labels=labels
)

model.to('cuda')

args = TrainingArguments(
    batch_size=4,
    num_epochs=2,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tmp_train_dataset,
    eval_dataset=tmp_eval_dataset,
    metric="accuracy",
    column_mapping={"text": "text", "labels": "label"}
)

# Finetune the model
trainer.train()

In [None]:
# Push model to HuggingFace repo
trainer.model.push_to_hub("bhujith10/bert-large-uncased-setfit_finetuned")

## Inference

In [None]:
model = SetFitModel.from_pretrained(
    "bhujith10/deberta-v3-base-setfit_finetuned",
    labels=['Computer Science', 'Physics', 'Mathematics', 'Statistics', 'Quantitative Biology', 'Quantitative Finance'],
)

In [None]:
# DataLoader for batching
batch_size = 4
dataloader = DataLoader(test_dataset, batch_size=batch_size)

predicted_labels = []
actual_labels = [sample['labels'] for sample in test_dataset]

# Generate predictions in batches
start_time = time.time()
for i,inputs in enumerate(dataloader):
    predictions = model.predict(inputs['text'])
    predicted_labels.extend(list(tmp) for tmp in predictions.detach().cpu().numpy())
end_time = time.time()

print(end_time-start_time)

In [None]:
calculate_f1_score(actual_labels,predicted_labels)