In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import AdamW
from tqdm import tqdm

# Load datasets
train_data = pd.read_csv(r'D:\Intel\sentimental\goemotions_encoded_train.csv')
val_data = pd.read_csv(r'D:\Intel\sentimental\goemotions_encoded_validation.csv')
test_data = pd.read_csv(r'D:\Intel\sentimental\goemotions_encoded_test.csv')

# Preprocessing
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

class CustomDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        return {
            'input_ids': torch.tensor(tokenizer.encode(text, max_length=100, truncation=True, padding='max_length')),
            'attention_mask': torch.tensor([1] * len(tokenizer.encode(text, max_length=100, truncation=True, padding='max_length'))),
            'labels': torch.tensor(label, dtype=torch.float)
        }

# Prepare labels (the remaining columns are the labels)
mlb = MultiLabelBinarizer()
y_train = mlb.fit_transform(train_data.drop(columns=['text']).values)
y_val = mlb.transform(val_data.drop(columns=['text']).values)
y_test = mlb.transform(test_data.drop(columns=['text']).values)

# Create datasets and dataloaders
train_dataset = CustomDataset(train_data['text'].values, y_train)
val_dataset = CustomDataset(val_data['text'].values, y_val)
test_dataset = CustomDataset(test_data['text'].values, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=y_train.shape[1])
model.to(device)

# Training the model
optimizer = AdamW(model.parameters(), lr=1e-5)

for epoch in range(5):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}')

# Evaluate the model
model.eval()
total_accuracy = 0
with torch.no_grad():
    for batch in val_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        predictions = torch.sigmoid(outputs.logits)  # Sigmoid for multi-label
        total_accuracy += ((predictions > 0.5) == labels).float().sum().item()

val_accuracy = total_accuracy / len(val_dataset)
print(f'Validation accuracy: {val_accuracy:.2f}')

# Evaluate on test set
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
model.eval()
total_test_accuracy = 0
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        predictions = torch.sigmoid(outputs.logits)
        total_test_accuracy += ((predictions > 0.5) == labels).float().sum().item()
test_accuracy = total_test_accuracy / len(test_dataset)
print(f'Test accuracy: {test_accuracy:.2f}')

  from .autonotebook import tqdm as notebook_tqdm
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  attn_output = torch.nn.functional.scaled_dot_product_attention(
100%|██████████| 679/679 [2:23:55<00:00, 12.72s/it]  


Epoch 1, Loss: 0.01727800669655374


100%|██████████| 679/679 [2:18:17<00:00, 12.22s/it]  


Epoch 2, Loss: 0.0007954373385994497


100%|██████████| 679/679 [2:17:56<00:00, 12.19s/it]  


Epoch 3, Loss: 0.00035152179068153304


100%|██████████| 679/679 [2:17:47<00:00, 12.18s/it]  


Epoch 4, Loss: 0.00019806015780312182


100%|██████████| 679/679 [2:17:45<00:00, 12.17s/it]  


Epoch 5, Loss: 0.00012414911306248526
Validation accuracy: 2.00
Test accuracy: 2.00


In [9]:
# Specify the directory to save the model and tokenizer
save_directory = r'D:/Intel/sentimental/saved_roberta_model'

# Save the model
model.save_pretrained(save_directory)

# Save the tokenizer
tokenizer.save_pretrained(save_directory)

print(f'Model and tokenizer saved to {save_directory}')

Model and tokenizer saved to D:/Intel/sentimental/saved_roberta_model


In [1]:
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import torch

# Load the model and tokenizer
model_directory = r'D:/Intel/sentimental/saved_roberta_model'
loaded_model = RobertaForSequenceClassification.from_pretrained(model_directory)
loaded_tokenizer = RobertaTokenizer.from_pretrained(model_directory)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loaded_model.to(device)

# Function to predict emotions from text input
def predict_emotions(text, model, tokenizer, threshold=0.5):
    model.eval()  # Set model to evaluation mode
    
    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=100).to(device)
    
    # Forward pass through the model
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Convert logits to probabilities using sigmoid
    logits = outputs.logits
    probabilities = torch.sigmoid(logits)
    
    # Emotion labels based on your dataset
    emotions = [
        'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire',
        'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
        'joy', 'love', 'nervousness', 'neutral', 'optimism', 'pride', 'realization', 'relief', 'remorse', 
        'sadness', 'surprise'
    ]
    
    # Apply threshold to decide which emotions are present
    predicted_labels = (probabilities > threshold).cpu().numpy()[0]
    predicted_emotions = [emotions[i] for i in range(len(predicted_labels)) if predicted_labels[i] == 1]
    
    return predicted_emotions

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import numpy as np

# Load the model and tokenizer
model_directory = r'D:/Intel/sentimental/saved_roberta_model'
loaded_model = RobertaForSequenceClassification.from_pretrained(model_directory).to(device)
loaded_tokenizer = RobertaTokenizer.from_pretrained(model_directory)

# Function to predict emotions
def predict_emotions(text, model, tokenizer, threshold=0.5):
    model.eval()  # Set model to evaluation mode
    
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=100).to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    logits = outputs.logits
    probabilities = torch.sigmoid(logits).cpu().numpy()[0]
    
    # List of emotions in the dataset
    emotions = [
        'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire',
        'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
        'joy', 'love', 'nervousness', 'neutral', 'optimism', 'pride', 'realization', 'relief', 'remorse', 
        'sadness', 'surprise'
    ]
    
    # Get predicted labels
    predicted_labels = (probabilities >= threshold).astype(int)
    
    predicted_emotions = [emotions[i] for i in range(len(predicted_labels)) if predicted_labels[i] == 1]
    
    return predicted_emotions

# Example input and output
input_text = "Why did you do that? I'm really angry and confused."
predicted_emotions = predict_emotions(input_text, loaded_model, loaded_tokenizer)
print(f'Input: {input_text}')
print(f'Predicted emotions: {predicted_emotions}')

Input: Why did you do that? I'm really angry and confused.
Predicted emotions: ['admiration', 'amusement']


In [11]:
# Test multiple sentences
sentences = [
    "I'm so happy and excited to see you.",
    "Why did you do that? I'm really angry and confused.",
    "I can't believe this is happening. I'm so disappointed.",
    "Wow! That was hilarious! I'm laughing so hard.",
    "I'm proud of what we achieved together."
]

for sentence in sentences:
    emotions = predict_emotions(sentence, loaded_model, loaded_tokenizer)
    print(f'Input: {sentence}')
    print(f'Predicted emotions: {emotions}')
    print('-' * 50)

Input: I'm so happy and excited to see you.
Predicted emotions: ['admiration', 'amusement']
--------------------------------------------------
Input: Why did you do that? I'm really angry and confused.
Predicted emotions: ['admiration', 'amusement']
--------------------------------------------------
Input: I can't believe this is happening. I'm so disappointed.
Predicted emotions: ['admiration', 'amusement']
--------------------------------------------------
Input: Wow! That was hilarious! I'm laughing so hard.
Predicted emotions: ['admiration', 'amusement']
--------------------------------------------------
Input: I'm proud of what we achieved together.
Predicted emotions: ['admiration', 'amusement']
--------------------------------------------------


In [1]:
import google.generativeai as genai

api_key = "AIzaSyAGOnKsiRkWyikky3x9q2NRQRPPqaiIZ2I"
genai.configure(api_key=api_key)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# List the models available to your API key
models = genai.models.list_models()

# Print model details
for model in models:
    print(f"Model ID: {model.model_id}, Display Name: {model.display_name}")

AttributeError: module 'google.generativeai' has no attribute 'models'

AttributeError: module 'google.generativeai' has no attribute 'generate_message'

In [3]:
pip install --upgrade google-ai

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement google-ai (from versions: none)
ERROR: No matching distribution found for google-ai


In [1]:
pip install google-generativeai

Note: you may need to restart the kernel to use updated packages.
