<a href="https://colab.research.google.com/github/Ishrak-DataScience/EmotionDetection/blob/main/FuckIshrak.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load the dataset (adjust file path as needed)
df = pd.read_csv('/content/eng.csv')

# Define a custom dataset
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer.encode_plus(
            text,
            max_length=self.max_len,
            add_special_tokens=True,
            return_token_type_ids=False,
            pad_to_max_length=True,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'text': text,
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Preprocess data
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 128
labels = df[['Anger', 'Fear', 'Joy', 'Sadness', 'Surprise']].idxmax(axis=1).factorize()[0]

train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['text'], labels, test_size=0.2, random_state=42
)

train_dataset = EmotionDataset(train_texts.to_numpy(), train_labels, tokenizer, max_len)
val_dataset = EmotionDataset(val_texts.to_numpy(), val_labels, tokenizer, max_len)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# Load the model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define optimizer and loss function
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()

# Training loop
def train_epoch(model, data_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0

    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        logits = outputs.logits

        total_loss += loss.item()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    return total_loss / len(data_loader)

# Validation loop
def eval_model(model, data_loader, device):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            predictions.extend(torch.argmax(logits, axis=1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    return classification_report(true_labels, predictions, target_names=['Anger', 'Fear', 'Joy', 'Sadness', 'Surprise'])

# Train the model
epochs = 4
for epoch in range(epochs):
    train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss}")

    val_report = eval_model(model, val_loader, device)
    print(f"Validation Report:\n{val_report}")

# Save the model
torch.save(model.state_dict(), 'emotion_model.pth')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4, Train Loss: 1.2408387682420745


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Validation Report:
              precision    recall  f1-score   support

       Anger       0.61      0.65      0.63       112
        Fear       0.50      0.02      0.03       114
         Joy       0.59      0.94      0.72       270
     Sadness       0.00      0.00      0.00        34
    Surprise       0.00      0.00      0.00        24

    accuracy                           0.59       554
   macro avg       0.34      0.32      0.28       554
weighted avg       0.51      0.59      0.49       554

Epoch 2/4, Train Loss: 0.880434302974948


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Validation Report:
              precision    recall  f1-score   support

       Anger       0.60      0.72      0.66       112
        Fear       0.55      0.47      0.51       114
         Joy       0.72      0.83      0.77       270
     Sadness       0.45      0.15      0.22        34
    Surprise       0.00      0.00      0.00        24

    accuracy                           0.66       554
   macro avg       0.46      0.43      0.43       554
weighted avg       0.61      0.66      0.63       554

Epoch 3/4, Train Loss: 0.49442935279376216
Validation Report:
              precision    recall  f1-score   support

       Anger       0.64      0.62      0.63       112
        Fear       0.50      0.54      0.52       114
         Joy       0.74      0.79      0.77       270
     Sadness       0.30      0.29      0.30        34
    Surprise       0.33      0.04      0.07        24

    accuracy                           0.64       554
   macro avg       0.51      0.46      0.46       



Epoch 4/4, Train Loss: 0.24599560563244838
Validation Report:
              precision    recall  f1-score   support

       Anger       0.67      0.67      0.67       112
        Fear       0.49      0.60      0.54       114
         Joy       0.77      0.78      0.77       270
     Sadness       0.36      0.24      0.29        34
    Surprise       0.33      0.12      0.18        24

    accuracy                           0.66       554
   macro avg       0.53      0.48      0.49       554
weighted avg       0.65      0.66      0.65       554

