In [None]:
!pip install transformers datasets torch torchvision torchaudio sentencepiece kaggle


In [None]:
from google.colab import files
files.upload()  # Upload kaggle.json here


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"aroojkazmi","key":"263d2edef21d7bc9d81cad8a26cb4bb4"}'}

In [None]:
!ls ~/.kaggle


kaggle.json


In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
!kaggle datasets list


ref                                                                    title                                                size  lastUpdated          downloadCount  voteCount  usabilityRating  
---------------------------------------------------------------------  --------------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
atharvasoundankar/chocolate-sales                                      Chocolate Sales Data 📊🍫                              63KB  2025-03-07 05:44:03           5103         67  1.0              
abdulmalik1518/mobiles-dataset-2025                                    Mobiles Dataset (2025)                               20KB  2025-02-18 06:50:24          12026        212  1.0              
salahuddinahmedshuvo/ecommerce-consumer-behavior-analysis-data         Ecommerce Consumer Behavior Analysis Data            43KB  2025-03-03 13:09:09           2648         43  0.9411765        
willianoliveiragibin/pixa

In [None]:
!kaggle datasets download -d parulpandey/emotion-dataset


Dataset URL: https://www.kaggle.com/datasets/parulpandey/emotion-dataset
License(s): CC0-1.0
Downloading emotion-dataset.zip to /content
  0% 0.00/715k [00:00<?, ?B/s]
100% 715k/715k [00:00<00:00, 118MB/s]


In [None]:
!unzip /content/emotion-dataset.zip -d /content/emotion_dataset/


Archive:  /content/emotion-dataset.zip
  inflating: /content/emotion_dataset/test.csv  
  inflating: /content/emotion_dataset/training.csv  
  inflating: /content/emotion_dataset/validation.csv  


In [None]:
import pandas as pd

# Load datasets
train_df = pd.read_csv('/content/emotion_dataset/training.csv')
val_df = pd.read_csv('/content/emotion_dataset/validation.csv')
test_df = pd.read_csv('/content/emotion_dataset/test.csv')

# Display first few rows of the training set
print("Training Data:")
print(train_df.head())

# Check column names and dataset information
print("\nDataset Info:")
print(train_df.info())


Training Data:
                                                text  label
0                            i didnt feel humiliated      0
1  i can go from feeling so hopeless to so damned...      0
2   im grabbing a minute to post i feel greedy wrong      3
3  i am ever feeling nostalgic about the fireplac...      2
4                               i am feeling grouchy      3

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16000 entries, 0 to 15999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    16000 non-null  object
 1   label   16000 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 250.1+ KB
None


In [None]:
import torch
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Define function for tokenizing text
def tokenize_text(text_list, max_length=128):
    return tokenizer(
        text_list,
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors="pt"
    )

# Tokenizing the datasets
train_encodings = tokenize_text(train_df['text'].tolist())
val_encodings = tokenize_text(val_df['text'].tolist())
test_encodings = tokenize_text(test_df['text'].tolist())

# Convert labels to tensors
train_labels = torch.tensor(train_df['label'].values)
val_labels = torch.tensor(val_df['label'].values)
test_labels = torch.tensor(test_df['label'].values)

print("Tokenization complete!")
print(f"Train Encodings Shape: {train_encodings['input_ids'].shape}")


Tokenization complete!
Train Encodings Shape: torch.Size([16000, 87])


In [None]:
from torch.utils.data import Dataset, DataLoader

class EmotionDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item

# Creating dataset objects
train_dataset = EmotionDataset(train_encodings, train_labels)
val_dataset = EmotionDataset(val_encodings, val_labels)
test_dataset = EmotionDataset(test_encodings, test_labels)

# Dataloader for batch processing
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

print("Dataset and DataLoader created successfully!")


Dataset and DataLoader created successfully!


In [None]:
import torch
import torch.nn as nn
from transformers import BertModel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class EmotionClassifier(nn.Module):
    def __init__(self, num_classes):
        super(EmotionClassifier, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        output = self.dropout(pooled_output)
        return self.fc(output)

# Define model
num_classes = len(set(train_labels.tolist()))  # Get number of unique labels
model = EmotionClassifier(num_classes).to(device)

# Define optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

print("Model initialized successfully!")


Using device: cuda
Model initialized successfully!


In [None]:
from torch.utils.data import DataLoader, TensorDataset

train_inputs = train_encodings["input_ids"].clone().detach()
train_labels = train_labels.clone().detach()

# Create TensorDataset
train_dataset = TensorDataset(train_inputs, train_labels)

# Create DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)

print("Train DataLoader created successfully!")


Train DataLoader created successfully!


In [None]:
for batch in train_dataloader:
    print(batch)
    break  # Print only the first batch


[tensor([[ 101, 1045, 2514,  ...,    0,    0,    0],
        [ 101, 1045, 2572,  ...,    0,    0,    0],
        [ 101, 1045, 2031,  ...,    0,    0,    0],
        ...,
        [ 101, 1045, 2031,  ...,    0,    0,    0],
        [ 101, 1045, 2113,  ...,    0,    0,    0],
        [ 101, 1045, 2514,  ...,    0,    0,    0]]), tensor([0, 0, 4, 1, 0, 1, 1, 4, 0, 1, 4, 2, 1, 1, 4, 0])]


In [None]:
from transformers import BertTokenizer
import torch

# Initialize tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Sample text data (replace with your dataset)
texts = ["I am happy", "I feel sad", "This is exciting!"]

# Tokenize
encoding = tokenizer(
    texts,
    padding=True,            # Pad to the longest sequence
    truncation=True,         # Truncate if too long
    max_length=50,           # Max token length
    return_tensors="pt"      # Return PyTorch tensors
)

input_ids = encoding["input_ids"]
attention_mask = encoding["attention_mask"]

# Sample labels (Replace with actual labels)
labels = torch.tensor([0, 1, 2])  # Example: 0 = happy, 1 = sad, 2 = excited

print("Tokenization successful!")


Tokenization successful!


In [None]:
from torch.utils.data import TensorDataset, DataLoader

# Create dataset with input_ids, attention_mask, and labels
dataset = TensorDataset(input_ids, attention_mask, labels)

# Create DataLoader
train_dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

print("Train DataLoader created successfully!")


Train DataLoader created successfully!


In [None]:
import torch
from torch.optim import AdamW
import torch.nn as nn

def train_model(model, train_dataloader, epochs=3):
    model.train()
    optimizer = AdamW(model.parameters(), lr=5e-5)
    loss_fn = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        for batch in train_dataloader:
            inputs, attention_mask, labels = batch

            # Move tensors to device (GPU or CPU)
            inputs, attention_mask, labels = inputs.to(device), attention_mask.to(device), labels.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(input_ids=inputs, attention_mask=attention_mask)

            # Ensure outputs is a tensor
            if isinstance(outputs, tuple):  # If model returns a tuple, extract the first element
                outputs = outputs[0]

            # Compute loss
            loss = loss_fn(outputs, labels)  # No need for `.logits` anymore
            loss.backward()

            # Optimize
            optimizer.step()

        print(f"Epoch {epoch+1} completed!")

    print("Training finished!")

# Run training
train_model(model, train_dataloader, epochs=3)


Epoch 1/3
Epoch 1 completed!
Epoch 2/3
Epoch 2 completed!
Epoch 3/3
Epoch 3 completed!
Training finished!


In [None]:
import torch

def predict_emotion(model, tokenizer, text, device):
    model.to(device)
    model.eval()

    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Move input tensors to the same device as the model
    inputs = {key: value.to(device) for key, value in inputs.items() if key != "token_type_ids"}  # Exclude 'token_type_ids'

    with torch.no_grad():
        outputs = model(**inputs)  # Directly get tensor output

    # Ensure outputs is a tensor
    if isinstance(outputs, tuple):
        outputs = outputs[0]  # Get the first element if it's a tuple

    # Convert logits to probabilities and get the predicted label
    probs = torch.nn.functional.softmax(outputs, dim=1)
    predicted_label = torch.argmax(probs, dim=1).item()

    return predicted_label

# Example usage
text = "I feel so great today!"
predicted_emotion = predict_emotion(model, tokenizer, text, device)
print(f"Predicted Emotion: {predicted_emotion}")


Predicted Emotion: 2


In [None]:
def predict_emotion(model, tokenizer, text, device):
    """Predicts the emotion of a given text input."""
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    inputs = {key: value.to(device) for key, value in inputs.items() if key != "token_type_ids"}

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits if hasattr(outputs, "logits") else outputs
    probs = torch.nn.functional.softmax(logits, dim=1)
    predicted_label = torch.argmax(probs, dim=1).item()

    # Debug: Print confidence scores
    print(f"🔍 Debug: {probs.cpu().numpy()} → Predicted Label: {predicted_label} ({emotion_labels[predicted_label]})")

    return predicted_label


In [None]:
emotion_labels = {
    0: "Happy 😊",
    1: "Sad 😢",
    2: "Angry 😡",
    3: "Surprised 😲",
    4: "Neutral 😐",
    5: "Anxious 😰"  # If your dataset has an 'Anxious' class, add it
}


In [None]:
def predict_emotion(model, tokenizer, text, device):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    probs = torch.nn.functional.softmax(outputs.logits, dim=1).cpu().numpy()
    predicted_label = probs.argmax(axis=1)[0]

    # 🔍 Debug Output
    print(f"🔍 Debug: {probs} → Predicted Label: {predicted_label} ({emotion_labels.get(predicted_label, 'Unknown')})")

    return predicted_label


In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch.nn.functional as F

def load_model():
    model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
    model = AutoModelForSequenceClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    return model, tokenizer, device

def predict_emotion(model, tokenizer, text, device):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits if hasattr(outputs, 'logits') else outputs
    probs = F.softmax(logits, dim=1)
    predicted_label = torch.argmax(probs, dim=1).item()
    return predicted_label, probs.max().item()

def get_emotion_response(label):
    responses = {
        0: ("Very Negative", "I'm here for you. What's on your mind? 💙"),
        1: ("Negative", "It sounds like something's bothering you. Want to talk about it?"),
        2: ("Neutral", "I hear you. How's your day going?"),
        3: ("Positive", "That’s nice to hear! Keep the good vibes going! 😊"),
        4: ("Very Positive", "Awesome! I'm happy for you! 🎉")
    }
    return responses.get(label, ("Unknown", "I'm not sure how you're feeling. Can you describe it another way?"))

def chatbot(model, tokenizer, device):
    print("🚀 Emotion Chatbot is running! Type 'exit' to quit.")
    goodbye_phrases = {"bye", "goodbye", "exit", "see you", "quit", "end chat"}
    while True:
        user_input = input("You: ").strip().lower()
        if user_input in goodbye_phrases:
            print("🤖 Chatbot: Goodbye! Take care! 👋")
            break
        label, confidence = predict_emotion(model, tokenizer, user_input, device)
        emotion, response = get_emotion_response(label)
        print(f"🔍 Debug: Predicted Emotion: {emotion} (Score: {confidence:.4f})")
        print(f"🤖 Chatbot [{emotion}]: {response}")

if __name__ == "__main__":
    model, tokenizer, device = load_model()
    chatbot(model, tokenizer, device):
    print("🚀 Emotion Chatbot is running! Type 'exit' to quit.")

    farewell_phrases = ["bye", "goodbye", "see you", "thanks for your time", "take care"]

    while True:
        user_input = input("You: ")

        # Convert to lowercase for case-insensitive matching
        lower_input = user_input.lower()

        # Check if the input contains a farewell phrase
        if any(phrase in lower_input for phrase in farewell_phrases):
            print("🤖 Chatbot: It was great talking to you! Take care! 😊")


🚀 Emotion Chatbot is running! Type 'exit' to quit.
You: i am doing software engineering
🔍 Debug: Predicted Emotion: Positive (Score: 0.3245)
🤖 Chatbot [Positive]: That’s nice to hear! Keep the good vibes going! 😊
You: sometime i feel depressed 
🔍 Debug: Predicted Emotion: Negative (Score: 0.4471)
🤖 Chatbot [Negative]: It sounds like something's bothering you. Want to talk about it?
You: i feel i am unlucky 
🔍 Debug: Predicted Emotion: Very Negative (Score: 0.4949)
🤖 Chatbot [Very Negative]: I'm here for you. What's on your mind? 💙
You: i will tell you next thanks for your time bye
🔍 Debug: Predicted Emotion: Very Positive (Score: 0.5435)
🤖 Chatbot [Very Positive]: Awesome! I'm happy for you! 🎉
You: bye
🤖 Chatbot: Goodbye! Take care! 👋
