# Emotion Recognition - Quick Start Guide

This notebook demonstrates how to use the Emotion Recognition system for text classification.

## Setup

In [None]:
# Import required libraries
import sys
import os
from pathlib import Path

# Add src to path
sys.path.append(str(Path.cwd().parent / "src"))

import torch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Import emotion recognition modules
from data_utils import EmotionDataLoader, download_sample_data
from preprocessing import EmotionPreprocessor
from models import create_model, EmotionPredictor
from config import ConfigManager

print("✅ Imports successful!")

## 1. Create Sample Data

Let's create some sample emotion data to work with:

In [None]:
# Create sample data
data_dir = "../data/raw"
os.makedirs(data_dir, exist_ok=True)

sample_file = download_sample_data(data_dir)
print(f"Created sample data: {sample_file}")

# Load and examine the data
df = pd.read_csv(sample_file)
print(f"\nDataset shape: {df.shape}")
print("\nFirst 5 rows:")
print(df.head())

# Check emotion distribution
emotion_counts = df['label'].value_counts()
print("\nEmotion distribution:")
print(emotion_counts)

In [None]:
# Visualize emotion distribution
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='label', order=emotion_counts.index)
plt.title('Emotion Distribution in Sample Dataset')
plt.xlabel('Emotion')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 2. Text Preprocessing

Let's explore the preprocessing capabilities:

In [None]:
# Create preprocessor
preprocessor = EmotionPreprocessor(
    lowercase=True,
    remove_urls=True,
    remove_mentions=True,
    emoji_handling="convert",
    expand_contractions=True
)

# Test preprocessing on sample texts
test_texts = [
    "I'm SO HAPPY today! 😊 Check out https://example.com @friend",
    "Can't believe this happened... I don't like it 😢",
    "#GreatNews! We won't give up! 🎉"
]

print("Preprocessing Examples:")
print("=" * 50)

for i, text in enumerate(test_texts, 1):
    processed = preprocessor.preprocess(text)
    print(f"\nExample {i}:")
    print(f"Original:  {text}")
    print(f"Processed: {processed}")

## 3. Load Pre-trained Model (Simulation)

Since we don't have a trained model yet, let's show how you would load and use one:

In [None]:
# Load configuration
config_manager = ConfigManager()
config = config_manager.load_model_config("distilbert")

print("Model Configuration:")
print(f"Model Type: {config.model.type}")
print(f"Number of Classes: {config.model.num_classes}")
print(f"Max Length: {config.model.max_length}")
print(f"Dropout Rate: {config.model.dropout_rate}")

print("\nTraining Configuration:")
print(f"Batch Size: {config.training.batch_size}")
print(f"Learning Rate: {config.training.learning_rate}")
print(f"Number of Epochs: {config.training.num_epochs}")

print("\nSupported Emotions:")
print(config.emotions)

In [None]:
# Create model (for demonstration)
model = create_model(
    model_type="distilbert",
    num_classes=6,
    dropout_rate=0.3
)

print(f"Created model: {type(model).__name__}")
print(f"Number of parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

## 4. Data Loading and Preparation

In [None]:
# Initialize data loader
data_loader = EmotionDataLoader("../data")

# Load and validate data
df = data_loader.load_csv_data(sample_file)
df = data_loader.validate_data(df)

print(f"Validated dataset: {len(df)} samples")

# Split data
train_df, val_df, test_df = data_loader.split_data(
    df,
    train_size=0.7,
    val_size=0.15,
    test_size=0.15,
    random_state=42
)

print(f"\nData splits:")
print(f"Training: {len(train_df)} samples")
print(f"Validation: {len(val_df)} samples")
print(f"Test: {len(test_df)} samples")

# Show class distribution in training set
train_dist = train_df['label'].value_counts()
print("\nTraining set emotion distribution:")
for emotion, count in train_dist.items():
    percentage = (count / len(train_df)) * 100
    print(f"{emotion}: {count} ({percentage:.1f}%)")

## 5. Training Command Examples

Here are the commands you would use to train models:

In [None]:
print("Training Command Examples:")
print("=" * 50)

commands = [
    "# Train DistilBERT model with sample data",
    "python scripts/train.py --config distilbert --use-sample-data --experiment-name distilbert_sample",
    "",
    "# Train Twitter RoBERTa model", 
    "python scripts/train.py --config twitter_roberta --data-path data/emotions.csv --epochs 8",
    "",
    "# Train BiLSTM model with custom parameters",
    "python scripts/train.py --config bilstm --batch-size 64 --learning-rate 0.001 --epochs 15",
    "",
    "# Train ensemble model",
    "python scripts/train.py --config ensemble --data-path data/emotions.csv --experiment-name ensemble_model"
]

for cmd in commands:
    print(cmd)

## 6. Prediction Examples

Examples of how to make predictions with trained models:

In [None]:
print("Prediction Command Examples:")
print("=" * 50)

prediction_commands = [
    "# Single text prediction",
    'python scripts/predict.py -m outputs/my_model -t "I love this amazing day!"',
    "",
    "# Batch prediction from file",
    "python scripts/predict.py -m outputs/my_model --text-file input_texts.txt -o predictions.json",
    "",
    "# CSV file prediction with probabilities",
    "python scripts/predict.py -m outputs/my_model --csv-file data.csv --include-probabilities -o results.csv",
    "",
    "# Top-3 predictions",
    "python scripts/predict.py -m outputs/my_model -t \"Mixed emotions text\" --top-k 3 --include-probabilities"
]

for cmd in prediction_commands:
    print(cmd)

## 7. Evaluation Examples

In [None]:
print("Evaluation Command Examples:")
print("=" * 50)

eval_commands = [
    "# Basic evaluation",
    "python scripts/evaluate.py -m outputs/my_model --output-dir evaluation_results",
    "",
    "# Evaluation with error analysis",
    "python scripts/evaluate.py -m outputs/my_model --error-analysis --save-predictions",
    "",
    "# Evaluate on custom test data",
    "python scripts/evaluate.py -m outputs/my_model --data-path test_data.csv --output-dir custom_eval",
    "",
    "# Quick evaluation without plots",
    "python scripts/evaluate.py -m outputs/my_model --no-plots --batch-size 128"
]

for cmd in eval_commands:
    print(cmd)

## 8. Python API Usage

How to use the emotion recognition system directly in Python:

In [None]:
# Example: Using the quick_predict function (when you have a trained model)
# Note: This would work with an actual trained model

print("Python API Usage Examples:")
print("=" * 30)

api_examples = '''
# Quick prediction (simplest approach)
from src import quick_predict

result = quick_predict("I love this amazing day!", "outputs/my_model")
print(result)  # Output: "joy"

# Advanced usage with custom preprocessor
from src import EmotionPreprocessor, EmotionPredictor, create_model

# Create components
preprocessor = EmotionPreprocessor(emoji_handling="convert")
model = create_model("distilbert", num_classes=6)
predictor = EmotionPredictor(
    model=model,
    preprocessor=preprocessor,
    emotion_labels=["sadness", "joy", "love", "anger", "fear", "surprise"]
)

# Make predictions
result = predictor.predict("I\'m so excited! 🎉")
print(result)  # Output: emotion prediction

# Batch predictions
texts = ["I love this!", "This is frustrating", "What a surprise!"]
results = predictor.predict_batch(texts, return_probabilities=True)
for text, result in zip(texts, results):
    print(f"Text: {text}")
    print(f"Emotion: {result['emotion']} (confidence: {result['probability']:.3f})")
'''

print(api_examples)

## 9. Configuration Customization

In [None]:
# Show how to customize configurations
print("Configuration Customization Examples:")
print("=" * 40)

config_examples = '''
# Create custom configuration
custom_config = {
    "model": {
        "type": "distilbert",
        "num_classes": 6,
        "max_length": 256,  # Longer sequences
        "dropout_rate": 0.2  # Lower dropout
    },
    "training": {
        "batch_size": 16,   # Smaller batches
        "learning_rate": 1e-5,  # Lower learning rate
        "num_epochs": 15,   # More epochs
        "early_stopping": {
            "enabled": True,
            "patience": 5
        }
    },
    "preprocessing": {
        "lowercase": True,
        "emoji_handling": "keep",  # Keep emojis
        "remove_urls": False       # Keep URLs
    }
}

# Save custom config
import yaml
with open("my_custom_config.yaml", "w") as f:
    yaml.dump(custom_config, f)

# Use custom config
# python scripts/train.py --config-file my_custom_config.yaml
'''

print(config_examples)

## 10. Next Steps

To actually train and use models:

1. **Prepare your data**: Create a CSV file with 'text' and 'label' columns
2. **Choose a model**: Select from distilbert, twitter_roberta, bilstm, or ensemble
3. **Train the model**: Use the training script with your data
4. **Evaluate performance**: Use the evaluation script to assess model quality
5. **Make predictions**: Use the trained model for emotion classification

### Training Command:
```bash
python scripts/train.py --config distilbert --data-path your_data.csv --experiment-name my_experiment
```

### Prediction Command:
```bash
python scripts/predict.py -m outputs/my_experiment -t "Your text here"
```

Happy emotion recognition! 😊