In [None]:
# Advanced Emotion Detection System with Deep Learning
# Designed for Google Colab - Multi-label Emotion Classification with Intensity Prediction

!pip install transformers datasets torch torchvision torchaudio
!pip install scikit-learn matplotlib seaborn wordcloud
!pip install plotly pandas numpy
!pip install accelerate
!pip install lime

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer, AutoModel,
    get_linear_schedule_with_warmup, AutoConfig
)
from torch.optim import AdamW
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import MultiLabelBinarizer
import plotly.express as px
import plotly.graph_objects as go
from wordcloud import WordCloud
import re
import json
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

class AdvancedEmotionDataset(Dataset):
    """Custom dataset for multi-label emotion classification with intensity"""

    def __init__(self, texts, emotions, intensities, tokenizer, max_length=512):
        self.texts = texts
        self.emotions = emotions
        self.intensities = intensities
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        emotions = self.emotions[idx]
        intensities = self.intensities[idx]

        # Tokenize text
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'emotions': torch.FloatTensor(emotions),
            'intensities': torch.FloatTensor(intensities)
        }

class MultiTaskEmotionModel(nn.Module):
    """Advanced multi-task model for emotion classification and intensity prediction"""

    def __init__(self, model_name='distilbert-base-uncased', num_emotions=8, dropout_rate=0.3):
        super(MultiTaskEmotionModel, self).__init__()

        self.bert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(dropout_rate)

        # Get BERT hidden size
        bert_hidden_size = self.bert.config.hidden_size

        # Shared feature extraction layers
        self.feature_extractor = nn.Sequential(
            nn.Linear(bert_hidden_size, 512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )

        # Emotion classification head
        self.emotion_classifier = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, num_emotions)
        )

        # Intensity regression head
        self.intensity_regressor = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, num_emotions)
        )

        # Attention mechanism for interpretability
        self.attention = nn.MultiheadAttention(bert_hidden_size, num_heads=8, batch_first=True)

    def forward(self, input_ids, attention_mask):
        # Get BERT outputs
        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)

        # Use [CLS] token representation
        cls_output = bert_output.last_hidden_state[:, 0, :]

        # Apply attention mechanism for interpretability
        attended_output, attention_weights = self.attention(
            bert_output.last_hidden_state,
            bert_output.last_hidden_state,
            bert_output.last_hidden_state,
            key_padding_mask=~attention_mask.bool()
        )

        # Pool attended output
        pooled_attended = attended_output.mean(dim=1)

        # Combine CLS and attended representations
        combined_features = cls_output + pooled_attended

        # Extract features
        features = self.feature_extractor(combined_features)

        # Get emotion predictions
        emotion_logits = self.emotion_classifier(features)
        emotion_probs = torch.sigmoid(emotion_logits)

        # Get intensity predictions
        intensity_scores = torch.sigmoid(self.intensity_regressor(features))

        return emotion_probs, intensity_scores, attention_weights

class EmotionAnalyzer:
    """Advanced emotion analysis system"""

    def __init__(self):
        self.emotion_labels = [
            'joy', 'sadness', 'anger', 'fear', 'surprise', 'disgust', 'trust', 'anticipation'
        ]
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")

        # Initialize tokenizer and model
        self.model_name = 'distilbert-base-uncased'
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = None

    def create_synthetic_dataset(self, size=5000):
        """Create a comprehensive synthetic dataset for training"""

        # Emotion-specific text patterns
        emotion_patterns = {
            'joy': [
                "I'm so happy and excited about {}!",
                "This brings me so much joy and happiness.",
                "I feel absolutely wonderful and delighted.",
                "What a fantastic and amazing {}!",
                "I'm thrilled and overjoyed with {}."
            ],
            'sadness': [
                "I feel so sad and heartbroken about {}.",
                "This makes me deeply melancholy and sorrowful.",
                "I'm overwhelmed with grief and sadness.",
                "Such a disappointing and depressing {}.",
                "I feel blue and downhearted about {}."
            ],
            'anger': [
                "I'm absolutely furious and enraged about {}!",
                "This makes me so angry and irritated.",
                "I feel intense rage and frustration.",
                "What an infuriating and maddening {}!",
                "I'm livid and outraged by {}."
            ],
            'fear': [
                "I'm terrified and scared about {}.",
                "This fills me with dread and anxiety.",
                "I feel nervous and apprehensive about {}.",
                "Such a frightening and alarming {}.",
                "I'm worried and fearful of {}."
            ],
            'surprise': [
                "I'm so surprised and amazed by {}!",
                "This is absolutely shocking and unexpected.",
                "What a stunning and astonishing {}!",
                "I'm bewildered and taken aback by {}.",
                "Such an incredible and surprising {}."
            ],
            'disgust': [
                "I find {} absolutely disgusting and revolting.",
                "This makes me feel sick and nauseated.",
                "Such a repulsive and offensive {}.",
                "I'm appalled and disgusted by {}.",
                "This is utterly vile and repugnant."
            ],
            'trust': [
                "I have complete faith and confidence in {}.",
                "This makes me feel secure and trusting.",
                "I deeply believe and trust in {}.",
                "Such a reliable and trustworthy {}.",
                "I feel safe and assured about {}."
            ],
            'anticipation': [
                "I'm eagerly looking forward to {}!",
                "I can't wait and am excited about {}.",
                "This fills me with hope and expectation.",
                "I'm anticipating and preparing for {}.",
                "Such an exciting and promising {}."
            ]
        }

        # Context words for variety
        contexts = [
            "this news", "the outcome", "my future", "this opportunity", "the results",
            "this situation", "the decision", "my experience", "this event", "the change",
            "this journey", "the process", "my life", "this moment", "the discovery"
        ]

        texts = []
        emotion_labels = []
        intensity_scores = []

        for _ in range(size):
            # Randomly select 1-3 emotions for multi-label
            num_emotions = np.random.choice([1, 2, 3], p=[0.6, 0.3, 0.1])
            selected_emotions = np.random.choice(list(emotion_patterns.keys()),
                                               size=num_emotions, replace=False)

            # Create text combining multiple emotions
            text_parts = []
            current_emotions = [0] * len(self.emotion_labels)
            current_intensities = [0.0] * len(self.emotion_labels)

            for emotion in selected_emotions:
                pattern = np.random.choice(emotion_patterns[emotion])
                context = np.random.choice(contexts)
                text_parts.append(pattern.format(context))

                # Set emotion label and intensity
                emotion_idx = self.emotion_labels.index(emotion)
                current_emotions[emotion_idx] = 1
                current_intensities[emotion_idx] = np.random.uniform(0.6, 1.0)

            # Combine text parts
            combined_text = " ".join(text_parts)

            # Add some noise and complexity
            if np.random.random() < 0.3:
                noise_words = ["However,", "But", "Although", "Meanwhile", "Additionally"]
                combined_text = np.random.choice(noise_words) + " " + combined_text

            texts.append(combined_text)
            emotion_labels.append(current_emotions)
            intensity_scores.append(current_intensities)

        return texts, emotion_labels, intensity_scores

    def prepare_data(self, test_size=0.2, val_size=0.1):
        """Prepare training, validation, and test datasets"""

        print("Creating comprehensive synthetic dataset...")
        texts, emotions, intensities = self.create_synthetic_dataset(5000)

        # Split data
        X_train, X_temp, y_emotions_train, y_emotions_temp, y_intensities_train, y_intensities_temp = train_test_split(
            texts, emotions, intensities, test_size=test_size + val_size, random_state=42
        )

        X_val, X_test, y_emotions_val, y_emotions_test, y_intensities_val, y_intensities_test = train_test_split(
            X_temp, y_emotions_temp, y_intensities_temp,
            test_size=test_size/(test_size + val_size), random_state=42
        )

        # Create datasets
        train_dataset = AdvancedEmotionDataset(
            X_train, y_emotions_train, y_intensities_train, self.tokenizer
        )
        val_dataset = AdvancedEmotionDataset(
            X_val, y_emotions_val, y_intensities_val, self.tokenizer
        )
        test_dataset = AdvancedEmotionDataset(
            X_test, y_emotions_test, y_intensities_test, self.tokenizer
        )

        return train_dataset, val_dataset, test_dataset

    def train_model(self, train_dataset, val_dataset, epochs=5, batch_size=16, learning_rate=2e-5):
        """Train the advanced emotion detection model"""

        # Initialize model
        self.model = MultiTaskEmotionModel(
            model_name=self.model_name,
            num_emotions=len(self.emotion_labels)
        ).to(self.device)

        # Create data loaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)

        # Initialize optimizer and scheduler
        optimizer = AdamW(self.model.parameters(), lr=learning_rate)
        total_steps = len(train_loader) * epochs
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=0, num_training_steps=total_steps
        )

        # Loss functions
        bce_loss = nn.BCELoss()
        mse_loss = nn.MSELoss()

        # Training history
        train_losses = []
        val_losses = []

        print("Starting training...")

        for epoch in range(epochs):
            # Training phase
            self.model.train()
            total_train_loss = 0

            for batch in train_loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                emotions = batch['emotions'].to(self.device)
                intensities = batch['intensities'].to(self.device)

                optimizer.zero_grad()

                emotion_probs, intensity_scores, _ = self.model(input_ids, attention_mask)

                # Calculate losses
                emotion_loss = bce_loss(emotion_probs, emotions)
                intensity_loss = mse_loss(intensity_scores, intensities)
                total_loss = emotion_loss + 0.5 * intensity_loss  # Weight intensity loss

                total_loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()

                total_train_loss += total_loss.item()

            avg_train_loss = total_train_loss / len(train_loader)
            train_losses.append(avg_train_loss)

            # Validation phase
            self.model.eval()
            total_val_loss = 0

            with torch.no_grad():
                for batch in val_loader:
                    input_ids = batch['input_ids'].to(self.device)
                    attention_mask = batch['attention_mask'].to(self.device)
                    emotions = batch['emotions'].to(self.device)
                    intensities = batch['intensities'].to(self.device)

                    emotion_probs, intensity_scores, _ = self.model(input_ids, attention_mask)

                    emotion_loss = bce_loss(emotion_probs, emotions)
                    intensity_loss = mse_loss(intensity_scores, intensities)
                    total_loss = emotion_loss + 0.5 * intensity_loss

                    total_val_loss += total_loss.item()

            avg_val_loss = total_val_loss / len(val_loader)
            val_losses.append(avg_val_loss)

            print(f'Epoch {epoch+1}/{epochs}:')
            print(f'  Train Loss: {avg_train_loss:.4f}')
            print(f'  Val Loss: {avg_val_loss:.4f}')
            print()

        # Plot training history
        plt.figure(figsize=(12, 4))

        plt.subplot(1, 2, 1)
        plt.plot(train_losses, label='Train Loss', color='blue')
        plt.plot(val_losses, label='Validation Loss', color='red')
        plt.title('Training History')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True)

        plt.subplot(1, 2, 2)
        plt.plot(train_losses, label='Train Loss')
        plt.title('Training Loss Over Time')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.grid(True)

        plt.tight_layout()
        plt.show()

        print("Training completed!")

    def predict_emotions(self, text, return_attention=False):
        """Predict emotions and intensities for given text"""

        if self.model is None:
            raise ValueError("Model not trained yet!")

        self.model.eval()

        # Tokenize input
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=512,
            return_tensors='pt'
        )

        input_ids = encoding['input_ids'].to(self.device)
        attention_mask = encoding['attention_mask'].to(self.device)

        with torch.no_grad():
            emotion_probs, intensity_scores, attention_weights = self.model(input_ids, attention_mask)

        # Convert to numpy
        emotions = emotion_probs.cpu().numpy()[0]
        intensities = intensity_scores.cpu().numpy()[0]

        # Create results dictionary
        results = {
            'text': text,
            'predictions': {}
        }

        for i, emotion in enumerate(self.emotion_labels):
            results['predictions'][emotion] = {
                'probability': float(emotions[i]),
                'intensity': float(intensities[i]),
                'detected': emotions[i] > 0.5
            }

        if return_attention:
            results['attention_weights'] = attention_weights.cpu().numpy()

        return results

    def analyze_emotion_trends(self, texts):
        """Analyze emotion trends across multiple texts"""

        results = []
        for text in texts:
            prediction = self.predict_emotions(text)
            results.append(prediction)

        # Create trend analysis
        emotion_trends = {emotion: [] for emotion in self.emotion_labels}
        intensity_trends = {emotion: [] for emotion in self.emotion_labels}

        for result in results:
            for emotion in self.emotion_labels:
                emotion_trends[emotion].append(result['predictions'][emotion]['probability'])
                intensity_trends[emotion].append(result['predictions'][emotion]['intensity'])

        # Visualize trends
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))

        # Emotion probabilities over time
        axes[0, 0].set_title('Emotion Probabilities Over Texts')
        for emotion in self.emotion_labels:
            axes[0, 0].plot(emotion_trends[emotion], label=emotion, alpha=0.7)
        axes[0, 0].legend()
        axes[0, 0].set_xlabel('Text Index')
        axes[0, 0].set_ylabel('Probability')
        axes[0, 0].grid(True)

        # Average emotion intensities
        avg_intensities = [np.mean(intensity_trends[emotion]) for emotion in self.emotion_labels]
        axes[0, 1].bar(self.emotion_labels, avg_intensities, color='skyblue')
        axes[0, 1].set_title('Average Emotion Intensities')
        axes[0, 1].set_ylabel('Average Intensity')
        axes[0, 1].tick_params(axis='x', rotation=45)

        # Emotion correlation heatmap
        emotion_matrix = np.array([emotion_trends[emotion] for emotion in self.emotion_labels])
        correlation = np.corrcoef(emotion_matrix)
        im = axes[1, 0].imshow(correlation, cmap='coolwarm', vmin=-1, vmax=1)
        axes[1, 0].set_title('Emotion Correlation Matrix')
        axes[1, 0].set_xticks(range(len(self.emotion_labels)))
        axes[1, 0].set_yticks(range(len(self.emotion_labels)))
        axes[1, 0].set_xticklabels(self.emotion_labels, rotation=45)
        axes[1, 0].set_yticklabels(self.emotion_labels)
        plt.colorbar(im, ax=axes[1, 0])

        # Dominant emotions distribution
        dominant_emotions = []
        for result in results:
            max_emotion = max(result['predictions'].items(),
                            key=lambda x: x[1]['probability'])[0]
            dominant_emotions.append(max_emotion)

        emotion_counts = {emotion: dominant_emotions.count(emotion) for emotion in self.emotion_labels}
        axes[1, 1].pie(emotion_counts.values(), labels=emotion_counts.keys(), autopct='%1.1f%%')
        axes[1, 1].set_title('Dominant Emotions Distribution')

        plt.tight_layout()
        plt.show()

        return results

    def create_emotion_wordcloud(self, texts):
        """Create word clouds for different emotions"""

        # Analyze all texts
        all_results = []
        for text in texts:
            result = self.predict_emotions(text)
            all_results.append((text, result))

        # Group texts by dominant emotion
        emotion_texts = {emotion: [] for emotion in self.emotion_labels}

        for text, result in all_results:
            dominant_emotion = max(result['predictions'].items(),
                                 key=lambda x: x[1]['probability'])[0]
            emotion_texts[dominant_emotion].append(text)

        # Create word clouds
        fig, axes = plt.subplots(2, 4, figsize=(20, 10))
        axes = axes.flatten()

        for i, emotion in enumerate(self.emotion_labels):
            if emotion_texts[emotion]:
                text_combined = ' '.join(emotion_texts[emotion])
                wordcloud = WordCloud(width=400, height=300, background_color='white').generate(text_combined)
                axes[i].imshow(wordcloud, interpolation='bilinear')
                axes[i].set_title(f'{emotion.capitalize()} Words', fontsize=14)
            else:
                axes[i].text(0.5, 0.5, f'No {emotion} texts', ha='center', va='center', transform=axes[i].transAxes)
                axes[i].set_title(f'{emotion.capitalize()} Words', fontsize=14)

            axes[i].axis('off')

        plt.tight_layout()
        plt.show()

    def interactive_emotion_analysis(self, texts):
        """Create interactive visualizations using Plotly"""

        # Analyze emotions for all texts
        results = []
        for i, text in enumerate(texts):
            prediction = self.predict_emotions(text)
            for emotion in self.emotion_labels:
                results.append({
                    'text_id': i,
                    'text_preview': text[:50] + '...' if len(text) > 50 else text,
                    'emotion': emotion,
                    'probability': prediction['predictions'][emotion]['probability'],
                    'intensity': prediction['predictions'][emotion]['intensity']
                })

        df = pd.DataFrame(results)

        # Create interactive emotion heatmap
        pivot_prob = df.pivot(index='text_id', columns='emotion', values='probability')
        pivot_intensity = df.pivot(index='text_id', columns='emotion', values='intensity')

        fig1 = px.imshow(pivot_prob.T,
                        title='Emotion Probabilities Across Texts',
                        labels=dict(x="Text ID", y="Emotion", color="Probability"),
                        color_continuous_scale='viridis')
        fig1.show()

        # Create interactive scatter plot
        fig2 = px.scatter(df, x='probability', y='intensity', color='emotion',
                         size='probability', hover_data=['text_preview'],
                         title='Emotion Probability vs Intensity')
        fig2.show()

        # Create radar chart for average emotions
        avg_emotions = df.groupby('emotion')[['probability', 'intensity']].mean().reset_index()

        fig3 = go.Figure()

        fig3.add_trace(go.Scatterpolar(
            r=avg_emotions['probability'],
            theta=avg_emotions['emotion'],
            fill='toself',
            name='Probability'
        ))

        fig3.add_trace(go.Scatterpolar(
            r=avg_emotions['intensity'],
            theta=avg_emotions['emotion'],
            fill='toself',
            name='Intensity'
        ))

        fig3.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[0, 1]
                )),
            showlegend=True,
            title="Average Emotion Scores"
        )
        fig3.show()

        return df

# Initialize and demonstrate the system
def main():
    """Main function to demonstrate the advanced emotion detection system"""

    print("🚀 Advanced Emotion Detection System")
    print("=" * 50)

    # Initialize analyzer
    analyzer = EmotionAnalyzer()

    # Prepare data
    train_dataset, val_dataset, test_dataset = analyzer.prepare_data()

    print(f"Dataset prepared:")
    print(f"  Training samples: {len(train_dataset)}")
    print(f"  Validation samples: {len(val_dataset)}")
    print(f"  Test samples: {len(test_dataset)}")
    print()

    # Train model
    analyzer.train_model(train_dataset, val_dataset, epochs=3, batch_size=8)

    # Test with sample texts
    sample_texts = [
        "I'm absolutely thrilled about this amazing opportunity! It's going to be fantastic.",
        "I feel so sad and disappointed about what happened. It breaks my heart.",
        "This situation makes me incredibly angry and frustrated. I can't stand it!",
        "I'm really worried and anxious about the upcoming presentation. What if I mess up?",
        "What a surprising turn of events! I never expected this to happen.",
        "This behavior is absolutely disgusting and repulsive. I can't tolerate it.",
        "I have complete trust and confidence in your abilities. You're reliable.",
        "I'm eagerly anticipating the results. I can't wait to see what happens!"
    ]

    print("\n" + "=" * 50)
    print("🔍 Individual Emotion Analysis")
    print("=" * 50)

    for text in sample_texts[:3]:  # Show first 3 for brevity
        result = analyzer.predict_emotions(text)
        print(f"\nText: {text}")
        print("Detected emotions:")

        for emotion, data in result['predictions'].items():
            if data['detected']:
                print(f"  {emotion.capitalize()}: {data['probability']:.3f} (intensity: {data['intensity']:.3f})")

    print("\n" + "=" * 50)
    print("📊 Comprehensive Analysis")
    print("=" * 50)

    # Analyze trends
    trend_results = analyzer.analyze_emotion_trends(sample_texts)

    # Create word clouds
    analyzer.create_emotion_wordcloud(sample_texts)

    # Interactive analysis
    print("\nCreating interactive visualizations...")
    df_results = analyzer.interactive_emotion_analysis(sample_texts)

    print("\n" + "=" * 50)
    print("✅ System Demo Complete!")
    print("=" * 50)
    print("Key Features Demonstrated:")
    print("• Multi-label emotion classification")
    print("• Emotion intensity prediction")
    print("• Advanced BERT-based architecture")
    print("• Attention mechanism for interpretability")
    print("• Comprehensive visualization suite")
    print("• Interactive analysis tools")
    print("• Word cloud generation")
    print("• Trend analysis capabilities")

    return analyzer

# Run the demonstration
if __name__ == "__main__":
    emotion_analyzer = main()