In [1]:
pip install transformers torch pandas

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import RobertaTokenizer, RobertaForSequenceClassification, AdamW
import torch.nn as nn

# Load datasets
train_data = pd.read_csv(r'C:\Users\RAJKUMAR\Desktop\Intel\sentimental\new\go_emotions_train.csv')
test_data = pd.read_csv(r'C:\Users\RAJKUMAR\Desktop\Intel\sentimental\new\go_emotions_test.csv')

# Convert the label string to a list of integers
def parse_labels(label_str):
    return [int(label) for label in label_str.strip('[]').split(',')]

# Parse labels for training data
train_data['labels'] = train_data['labels'].apply(parse_labels)

# Create binary labels for all emotions based on the maximum label index
max_label_index = 28  # Adjust this based on your dataset
def create_binary_labels(labels):
    binary_label = np.zeros(max_label_index)
    for label in labels:
        binary_label[label] = 1
    return binary_label

# Apply the function to create binary labels
train_data['binary_labels'] = train_data['labels'].apply(create_binary_labels)

# Prepare input texts and binary labels
X_train = train_data['text'].tolist()
y_train = np.array(list(train_data['binary_labels']))

# Tokenization
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Tokenize input texts
def tokenize_texts(texts, tokenizer, max_len=100):
    return tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=max_len,
        return_tensors="pt"
    )

# Tokenizing the training data
train_encodings = tokenize_texts(X_train, tokenizer)

# Create Dataset class for PyTorch
class EmotionDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float32)
        return item

    def __len__(self):
        return len(self.labels)

# Create the training dataset
train_dataset = EmotionDataset(train_encodings, y_train)

# Create a DataLoader
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Define the model
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=max_label_index)
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')

# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = AdamW(model.parameters(), lr=2e-5)

# Training loop
epochs = 3  # Adjust the number of epochs as necessary

for epoch in range(epochs):
    model.train()  # Set model to training mode
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)
        labels = batch['labels'].to(model.device)

        outputs = model(input_ids, attention_mask=attention_mask)
        loss = criterion(outputs.logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader)}")

# Function to predict emotions
def predict_emotions(text, model, tokenizer, threshold=0.5):
    model.eval()  # Set model to evaluation mode
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=100).to(model.device)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.sigmoid(logits).cpu().numpy()[0]

    emotions = [
        'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire',
        'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
        'joy', 'love', 'nervousness', 'neutral', 'optimism', 'pride', 'realization', 'relief', 'remorse', 
        'sadness', 'surprise'
    ]

    predicted_labels = (probabilities >= threshold).astype(int)
    predicted_emotions = [emotions[i] for i in range(len(predicted_labels)) if predicted_labels[i] == 1]

    return predicted_emotions

# Example input for prediction
input_text = "Why did you do that? I'm really angry and confused."
predicted_emotions = predict_emotions(input_text, model, tokenizer)
print(f'Input: {input_text}')
print(f'Predicted emotions: {predicted_emotions}')

# Save the model and tokenizerC:\Users\RAJKUMAR\Desktop\Intel\sentimental\new
save_directory = 'D:/Intel/sentimental/new/saved_roberta_model'
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Epoch 1/3, Loss: 0.12029866509650153
Epoch 2/3, Loss: 0.08517252965421912
Epoch 3/3, Loss: 0.07586475366392971
Input: Why did you do that? I'm really angry and confused.
Predicted emotions: ['anger']


('D:/Intel/sentimental/new/saved_roberta_model\\tokenizer_config.json',
 'D:/Intel/sentimental/new/saved_roberta_model\\special_tokens_map.json',
 'D:/Intel/sentimental/new/saved_roberta_model\\vocab.json',
 'D:/Intel/sentimental/new/saved_roberta_model\\merges.txt',
 'D:/Intel/sentimental/new/saved_roberta_model\\added_tokens.json')

In [2]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch

# Load the saved model and tokenizer
save_directory = 'D:/Intel/sentimental/new/saved_roberta_model'
tokenizer = RobertaTokenizer.from_pretrained(save_directory)
model = RobertaForSequenceClassification.from_pretrained(save_directory)

# Move the model to GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [2]:
def predict_emotions(text, model, tokenizer, threshold=0.5):
    model.eval()  # Set model to evaluation mode
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=100).to(model.device)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.sigmoid(logits).cpu().numpy()[0]

    emotions = [
        'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire',
        'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
        'joy', 'love', 'nervousness', 'neutral', 'optimism', 'pride', 'realization', 'relief', 'remorse', 
        'sadness', 'surprise'
    ]

    predicted_labels = (probabilities >= threshold).astype(int)
    predicted_emotions = [emotions[i] for i in range(len(predicted_labels)) if predicted_labels[i] == 1]

    return predicted_emotions

In [3]:
# Example input text for sentiment analysis
input_text = "I'm really excited and happy about the new project!"

# Predict the emotions associated with the input text
predicted_emotions = predict_emotions(input_text, model, tokenizer)

# Output the predictions
print(f'Input: {input_text}')
print(f'Predicted emotions: {predicted_emotions}')

Input: I'm really excited and happy about the new project!
Predicted emotions: ['excitement']


  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [4]:
input_texts = [
    "I'm really excited and happy about the new project!",
    "I'm feeling disappointed with how things turned out.",
    "This is such a confusing and annoying situation.",
    "I really admire your dedication and hard work."
]

for text in input_texts:
    predicted_emotions = predict_emotions(text, model, tokenizer)
    print(f'Input: {text}')
    print(f'Predicted emotions: {predicted_emotions}\n')

Input: I'm really excited and happy about the new project!
Predicted emotions: ['excitement']

Input: I'm feeling disappointed with how things turned out.
Predicted emotions: ['disappointment']

Input: This is such a confusing and annoying situation.
Predicted emotions: ['confusion']

Input: I really admire your dedication and hard work.
Predicted emotions: ['admiration']

