In [23]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoModel

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dataset = load_dataset("emotion", split="train[:2000]")
test_dataset = load_dataset("emotion", split="test[:100]")




In [13]:
dataset

Dataset({
    features: ['text', 'label'],
    num_rows: 2000
})

In [17]:
print(dataset[0])
print(dataset[20])

{'text': 'i didnt feel humiliated', 'label': 0}
{'text': 'i feel irritated and rejected without anyone doing anything or saying anything', 'label': 3}


In [20]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
print(tokenizer(dataset[0]['text']))


{'input_ids': [101, 1045, 2134, 2102, 2514, 26608, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}


In [None]:
def preprocess(examples):
    return tokenizer(examples["text"], truncation=True, padding=True)

tokenized_train = dataset.map(preprocess, batched=True)
tokenized_test = test_dataset.map(preprocess, batched=True)




In [24]:

base_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)


model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", 
    num_labels=6  
).to(device)



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [25]:
base_model

DistilBertModel(
  (embeddings): Embeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (layer): ModuleList(
      (0-5): 6 x TransformerBlock(
        (attention): DistilBertSdpaAttention(
          (dropout): Dropout(p=0.1, inplace=False)
          (q_lin): Linear(in_features=768, out_features=768, bias=True)
          (k_lin): Linear(in_features=768, out_features=768, bias=True)
          (v_lin): Linear(in_features=768, out_features=768, bias=True)
          (out_lin): Linear(in_features=768, out_features=768, bias=True)
        )
        (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (ffn): FFN(
          (dropout): Dropout(p=0.1, inplace=False)
          (lin1): Linear(in_features=768, out_features=3072, bias=True)
          (lin2): L

In [26]:
model

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [27]:
def compute_metrics(pred):
    labels = pred.label_ids  
    preds = pred.predictions.argmax(-1)  
    
    accuracy = (preds == labels).mean()
   
    unique_labels = np.unique(labels)
    precision = 0
    recall = 0
    f1 = 0
    for label in unique_labels:
        true_positives = ((preds == label) & (labels == label)).sum()
        pred_positives = (preds == label).sum()
        actual_positives = (labels == label).sum()
        
        p = true_positives / pred_positives if pred_positives > 0 else 0
        r = true_positives / actual_positives if actual_positives > 0 else 0
        f = 2 * p * r / (p + r) if (p + r) > 0 else 0
        
        weight = actual_positives / len(labels)  
        precision += p * weight
        recall += r * weight
        f1 += f * weight
    
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }




In [28]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=16,
    eval_strategy="epoch",
    fp16=True if torch.cuda.is_available() else False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    compute_metrics=compute_metrics  
)


trainer.train()

results = trainer.evaluate()
print(f"Results: {results}")




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.543267,0.33,0.312405,0.33,0.186728


Results: {'eval_loss': 1.5432666540145874, 'eval_accuracy': 0.33, 'eval_precision': 0.31240549828178693, 'eval_recall': 0.33, 'eval_f1': 0.18672767857142858, 'eval_runtime': 0.0739, 'eval_samples_per_second': 1352.325, 'eval_steps_per_second': 175.802, 'epoch': 1.0}


In [29]:
text = "I'm really happy today!"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model(**inputs)
predicted_class = outputs.logits.argmax().item()
print(f"Text: '{text}' → Emotion: {predicted_class}")

Text: 'I'm really happy today!' → Emotion: 1
