# Multi-Label Text Classification with Transformers
This repository contains code for fine-tuning transformer-based models for multi-label text classification using the `sem_eval_2018_task_1` dataset. The goal is to classify tweets into multiple emotion categories such as anger, anticipation, disgust, fear, joy, love, optimism, pessimism, sadness, surprise, and trust.

## Dataset
The dataset used is `sem_eval_2018_task_1`, specifically the `subtask5.english` subset. It contains tweets annotated with multiple emotion labels. The dataset is divided into training, validation, and test sets.

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
from datasets import load_dataset
import pandas as pd

# Load the dataset and inspect the columns
dataset = load_dataset("sem_eval_2018_task_1", "subtask5.english", trust_remote_code=True)

# Checking the available columns to identify the label column
print("Columns in dataset:", dataset['train'].column_names)

# Assuming "label" is the column with multi-labels, adjust if needed
label_column = "anticipation"  # Set the actual label column if it's different after inspecting the dataset columns
num_labels = 11  # Update this based on the specific number of classes in the dataset

# Evaluation metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = (pred.predictions > 0.5).astype(int)  # For multi-label classification
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='micro')
    return {'accuracy': accuracy, 'f1': f1, 'precision': precision, 'recall': recall}

# Function to fine-tune and evaluate a model
def fine_tune_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

    # Tokenization
    def tokenize_function(examples):
        return tokenizer(examples['text'], padding='max_length', truncation=True)

    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    # Ensure correct label format and column name
    tokenized_datasets = tokenized_datasets.rename_column(label_column, "labels")
    tokenized_datasets.set_format("torch")

    training_args = TrainingArguments(
        output_dir=f'./results_{model_name}',
        evaluation_strategy="epoch",
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        logging_dir='./logs',
        save_strategy="epoch",
        load_best_model_at_end=True,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets['validation'],
        compute_metrics=compute_metrics,
    )

    # Train and evaluate
    trainer.train()
    eval_result = trainer.evaluate()
    return eval_result

# Define models
model_names = ["bert-base-uncased", "bert-large-uncased", "bert-base-cased", "bert-base-multilingual-cased"]

# Store results in a dictionary
results = {}

# Fine-tune each model and store results
for model_name in model_names:
    results[model_name] = fine_tune_model(model_name)

# Tabulate results
results_df = pd.DataFrame(results).T
print(results_df)



  from .autonotebook import tqdm as notebook_tqdm


Columns in dataset: ['ID', 'Tweet', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust']


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map:   0%|          | 0/6838 [00:00<?, ? examples/s]


KeyError: 'text'

In [2]:
from datasets import load_dataset
dataset = load_dataset("sem_eval_2018_task_1", "subtask5.english")

In [3]:
dataset

DatasetDict({
    train: Dataset({
        features: ['ID', 'Tweet', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust'],
        num_rows: 6838
    })
    test: Dataset({
        features: ['ID', 'Tweet', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust'],
        num_rows: 3259
    })
    validation: Dataset({
        features: ['ID', 'Tweet', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust'],
        num_rows: 886
    })
})

In [4]:
example = dataset['train'][0]
example

{'ID': '2017-En-21441',
 'Tweet': "“Worry is a down payment on a problem you may never have'. \xa0Joyce Meyer.  #motivation #leadership #worry",
 'anger': False,
 'anticipation': True,
 'disgust': False,
 'fear': False,
 'joy': False,
 'love': False,
 'optimism': True,
 'pessimism': False,
 'sadness': False,
 'surprise': False,
 'trust': True}

In [5]:
labels = [label for label in dataset['train'].features.keys() if label not in ['ID', 'Tweet']]
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}
labels

['anger',
 'anticipation',
 'disgust',
 'fear',
 'joy',
 'love',
 'optimism',
 'pessimism',
 'sadness',
 'surprise',
 'trust']

In [6]:
pip install --upgrade transformers




In [7]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base")


In [8]:
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased",
                                                           problem_type="multi_label_classification",
                                                           num_labels=len(labels),
                                                           id2label=id2label,
                                                           label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
from transformers import AutoModel

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = AutoModel.from_pretrained("roberta-base")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
