In [None]:
!pip install torch transformers pandas scikit-learn

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
# At the start of your Colab notebook
!pip install torch transformers pandas scikit-learn tqdm

# Check GPU
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
Tesla T4


In [None]:
!pip install --upgrade transformers


Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m78.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.51.1
    Uninstalling transformers-4.51.1:
      Successfully uninstalled transformers-4.51.1
Successfully installed transformers-4.51.3


In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizer, RobertaForSequenceClassification, get_linear_schedule_with_warmup
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from tqdm import tqdm
from torch.optim import AdamW
import torch.nn.functional as F

class EnhancedEmotionCauseDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len, emotion_encoder):
        self.dataframe = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.emotion_encoder = emotion_encoder
        self.emotions = self.emotion_encoder.transform(dataframe['emotion'])

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        statement = row['statement']
        emotion = self.emotions[idx]
        cause_pairs = eval(row['causing_pairs']) if isinstance(row['causing_pairs'], str) else row['causing_pairs']
        if cause_pairs:
            scene_df = self.dataframe[self.dataframe['scene_id'] == row['scene_id']]
            cause_statements = []
            for cause_conv_id in cause_pairs:
                cause_row = scene_df[scene_df['conversation_id'] == cause_conv_id]
                if not cause_row.empty:
                    cause_statements.append(cause_row.iloc[0]['statement'])
            if cause_statements:
                combined_text = " [CAUSE] ".join([statement] + cause_statements)
            else:
                combined_text = statement
        else:
            combined_text = statement

        encoding = self.tokenizer.encode_plus(
            combined_text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'emotion_label': torch.tensor(emotion, dtype=torch.long)
        }

def train_emotion_cause_model(train_path, dev_path, test_path):
    train_df = pd.read_csv(train_path)
    dev_df = pd.read_csv(dev_path)
    test_df = pd.read_csv(test_path)

    max_len = 128
    model_name = 'roberta-base'
    tokenizer = RobertaTokenizer.from_pretrained(model_name)

    # Encode emotions and compute class weights
    emotion_encoder = LabelEncoder()
    emotion_encoder.fit(train_df['emotion'])
    train_labels = emotion_encoder.transform(train_df['emotion'])
    class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(train_labels), y=train_labels)
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

    model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=len(emotion_encoder.classes_))

    train_dataset = EnhancedEmotionCauseDataset(train_df, tokenizer, max_len, emotion_encoder)
    dev_dataset = EnhancedEmotionCauseDataset(dev_df, tokenizer, max_len, emotion_encoder)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True, num_workers=2)
    dev_loader = DataLoader(dev_dataset, batch_size=32, pin_memory=True, num_workers=2)

    optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
    total_steps = len(train_loader) * 5
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    class_weights_tensor = class_weights_tensor.to(device)
    loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights_tensor)
    scaler = torch.cuda.amp.GradScaler()

    num_epochs = 5
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
        for batch in progress_bar:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            emotion_labels = batch['emotion_label'].to(device)

            with torch.cuda.amp.autocast():
                outputs = model(input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                loss = loss_fn(logits, emotion_labels)

            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

            total_train_loss += loss.item()
            progress_bar.set_postfix({'Training Loss': loss.item()})

        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for batch in tqdm(dev_loader, desc="Validation", unit="batch"):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                emotion_labels = batch['emotion_label'].to(device)

                outputs = model(input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                loss = loss_fn(logits, emotion_labels)
                total_val_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print(f"Average training loss: {avg_train_loss:.4f}")
        print(f"Average validation loss: {avg_val_loss:.4f}")

    model.save_pretrained('./emotion_cause_model')
    tokenizer.save_pretrained('./emotion_cause_model')
    np.save('./emotion_labels.npy', emotion_encoder.classes_)

    return model, emotion_encoder

def predict_emotion(statement, model, tokenizer, emotion_encoder):
    inputs = tokenizer.encode_plus(
        statement,
        add_special_tokens=True,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        predicted_emotion_idx = torch.argmax(outputs.logits, dim=1).item()
        predicted_emotion = emotion_encoder.inverse_transform([predicted_emotion_idx])[0]
    return predicted_emotion

def main():
    model, emotion_encoder = train_emotion_cause_model('train.csv', 'dev.csv', 'test.csv')
    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    test_statement = "I am really excited about this project!"
    predicted_emotion = predict_emotion(test_statement, model, tokenizer, emotion_encoder)
    print(f"Predicted Emotion: {predicted_emotion}")

if __name__ == '__main__':
    main()


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
Epoch 1/5: 100%|██████████| 312/312 [01:05<00:00,  4.75batch/s, Training Loss=1.72]
Validation: 100%|██████████| 34/34 [00:06<00:00,  5.10batch/s]



Epoch 1/5
Average training loss: 1.9002
Average validation loss: 1.7082


Epoch 2/5: 100%|██████████| 312/312 [01:05<00:00,  4.80batch/s, Training Loss=1.15]
Validation: 100%|██████████| 34/34 [00:06<00:00,  5.23batch/s]



Epoch 2/5
Average training loss: 1.5437
Average validation loss: 1.4004


Epoch 3/5: 100%|██████████| 312/312 [01:04<00:00,  4.87batch/s, Training Loss=0.944]
Validation: 100%|██████████| 34/34 [00:06<00:00,  5.22batch/s]



Epoch 3/5
Average training loss: 1.2770
Average validation loss: 1.2355


Epoch 4/5: 100%|██████████| 312/312 [01:03<00:00,  4.89batch/s, Training Loss=0.729]
Validation: 100%|██████████| 34/34 [00:06<00:00,  5.23batch/s]



Epoch 4/5
Average training loss: 1.1013
Average validation loss: 1.2474


Epoch 5/5: 100%|██████████| 312/312 [01:03<00:00,  4.89batch/s, Training Loss=0.901]
Validation: 100%|██████████| 34/34 [00:06<00:00,  5.22batch/s]



Epoch 5/5
Average training loss: 1.0194
Average validation loss: 1.2609
Predicted Emotion: neutral


In [None]:
import pandas as pd
import numpy as np
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

def load_model_and_tokenizer(model_path):
    """
    Load the trained model, tokenizer, and emotion labels
    """
    # Load tokenizer
    tokenizer = RobertaTokenizer.from_pretrained(model_path)

    # Load model
    model = RobertaForSequenceClassification.from_pretrained(model_path)

    # Load emotion labels
    emotion_labels = np.load('./emotion_labels.npy', allow_pickle=True)
    emotion_encoder = LabelEncoder()
    emotion_encoder.classes_ = emotion_labels

    return model, tokenizer, emotion_encoder

def prepare_cause_data(dataframe):
    """
    Prepare cause data by mapping cause pairs to actual statements
    using a combination of scene_id and conversation_id
    """
    # Create a unique mapping of (scene_id, conversation_id) to statement
    conv_to_statement = dataframe.set_index(['scene_id', 'conversation_id'])['statement'].to_dict()

    # Process cause pairs
    def process_cause_pair(row):
        cause_pair = eval(row['causing_pairs']) if isinstance(row['causing_pairs'], str) else row['causing_pairs']

        # Find cause statements using both scene_id and conversation_id
        cause_statements = []
        for cause_id in cause_pair:
            # Look for the statement with matching scene_id and conversation_id
            for (scene, conv), statement in conv_to_statement.items():
                if scene == row['scene_id'] and conv == cause_id:
                    cause_statements.append(statement)
                    break

        return cause_statements

    # Add cause statements column
    dataframe['cause_statements'] = dataframe.apply(process_cause_pair, axis=1)

    return dataframe

def predict_emotions_and_causes(test_path, model, tokenizer, emotion_encoder):
    """
    Predict emotions and causes for the test dataset
    Accurately mapping causes within the same scene
    """
    # Load test data
    test_df = pd.read_csv(test_path)

    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()

    # Predictions storage
    predictions = []

    # Group by scene_id to ensure we're working within each scene's context
    grouped_scenes = test_df.groupby('scene_id')

    for scene_id, scene_df in grouped_scenes:
        # Sort the scene dataframe by conversation_id to maintain conversation order
        scene_df = scene_df.sort_values('conversation_id')

        for idx, row in scene_df.iterrows():
            # Prepare input
            inputs = tokenizer.encode_plus(
                row['statement'],
                add_special_tokens=True,
                max_length=128,
                padding='max_length',
                truncation=True,
                return_tensors='pt'
            )

            # Move to device
            inputs = {k: v.to(device) for k, v in inputs.items()}

            # Predict emotion
            with torch.no_grad():
                outputs = model(**inputs)

            # Get predicted emotion
            predicted_emotion_idx = torch.argmax(outputs.logits, dim=1).item()
            predicted_emotion = emotion_encoder.inverse_transform([predicted_emotion_idx])[0]

            # Cause detection
            cause_details = []

            # Parse causing_pairs
            cause_pairs = eval(row['causing_pairs']) if isinstance(row['causing_pairs'], str) else row['causing_pairs']

            # Find actual cause statements within the same scene
            if cause_pairs:
                for cause_conv_id in cause_pairs:
                    # Find the statement with matching conversation_id in the same scene
                    cause_row = scene_df[scene_df['conversation_id'] == cause_conv_id]

                    if not cause_row.empty:
                        cause_statement = cause_row.iloc[0]['statement']
                        cause_details.append({
                            'cause_conversation_id': cause_conv_id,
                            'cause_statement': cause_statement,
                            'is_valid_cause': True
                        })

            # Store prediction
            predictions.append({
                'scene_id': row['scene_id'],
                'conversation_id': row['conversation_id'],
                'statement': row['statement'],
                'true_emotion': row['emotion'],
                'predicted_emotion': predicted_emotion,
                'causes': cause_details
            })

    return predictions

def evaluate_model(predictions):
    """
    Evaluate model performance with improved cause detection
    """
    # Emotion prediction accuracy
    true_emotions = [pred['true_emotion'] for pred in predictions]
    predicted_emotions = [pred['predicted_emotion'] for pred in predictions]

    # Classification report
    print("Emotion Classification Report:")
    print(classification_report(true_emotions, predicted_emotions))

    # Accuracy
    emotion_accuracy = accuracy_score(true_emotions, predicted_emotions)
    print(f"\nEmotion Prediction Accuracy: {emotion_accuracy:.4f}")

    # Cause detection analysis
    total_scenes = len(set(pred['scene_id'] for pred in predictions))
    scenes_with_causes = sum(1 for pred in predictions if pred['causes'])

    print("\nCause Detection Summary:")
    print(f"Total Scenes: {total_scenes}")
    print(f"Scenes with Identified Causes: {scenes_with_causes}")
    print(f"Percentage of Scenes with Causes: {(scenes_with_causes/total_scenes)*100:.2f}%")

    # Detailed cause information
    total_cause_pairs = sum(len(pred['causes']) for pred in predictions)
    print(f"Total Cause Pairs Identified: {total_cause_pairs}")

def main():
    # Paths
    test_path = 'test.csv'
    model_path = './emotion_cause_model'

    # Load model
    model, tokenizer, emotion_encoder = load_model_and_tokenizer(model_path)

    # Predict and evaluate
    predictions = predict_emotions_and_causes(
        test_path,
        model,
        tokenizer,
        emotion_encoder
    )

    # Evaluate model performance
    evaluate_model(predictions)

    # Optional: Save predictions to CSV for detailed analysis
    predictions_df = pd.DataFrame(predictions)
    predictions_df.to_csv('model_predictions.csv', index=False)

    # Sample of predictions for manual review
    print("\nSample Predictions:")
    for pred in predictions[:5]:
        print("\n---")
        print(f"Statement: {pred['statement']}")
        print(f"True Emotion: {pred['true_emotion']}")
        print(f"Predicted Emotion: {pred['predicted_emotion']}")
        print("Causes:")
        for cause in pred['causes']:
            print(f"  - Conversation ID: {cause['cause_conversation_id']}")
            print(f"    Statement: {cause['cause_statement']}")

if __name__ == '__main__':
    main()

Emotion Classification Report:
              precision    recall  f1-score   support

       anger       0.50      0.34      0.41       333
     disgust       0.53      0.10      0.17        79
        fear       0.17      0.27      0.21        56
         joy       0.64      0.27      0.38       429
     neutral       0.57      0.93      0.71      1121
     sadness       0.47      0.08      0.14       241
    surprise       0.67      0.38      0.49       307

    accuracy                           0.56      2566
   macro avg       0.51      0.34      0.36      2566
weighted avg       0.57      0.56      0.51      2566


Emotion Prediction Accuracy: 0.5592

Cause Detection Summary:
Total Scenes: 261
Scenes with Identified Causes: 1344
Percentage of Scenes with Causes: 514.94%
Total Cause Pairs Identified: 1873

Sample Predictions:

---
Statement: Wow .
True Emotion: surprise
Predicted Emotion: neutral
Causes:

---
Statement: I am , I am really sorry .
True Emotion: neutral
Predicted Em

In [None]:
def evaluate_model(predictions):
    """
    Provide detailed evaluation of emotion and cause predictions
    """
    # Separate true and predicted emotions
    true_emotions = [pred['true_emotion'] for pred in predictions]
    predicted_emotions = [pred['predicted_emotion'] for pred in predictions]

    # Classification report
    print("Emotion Classification Report:")
    emotion_report = classification_report(true_emotions, predicted_emotions, output_dict=True)
    print(classification_report(true_emotions, predicted_emotions))

    # Emotion-wise Accuracy
    print("\nEmotion-wise Prediction Accuracy:")
    emotion_counts = {}
    emotion_correct_counts = {}

    for true, pred in zip(true_emotions, predicted_emotions):
        # Total count for each emotion
        emotion_counts[true] = emotion_counts.get(true, 0) + 1

        # Correctly predicted count for each emotion
        if true == pred:
            emotion_correct_counts[true] = emotion_correct_counts.get(true, 0) + 1

    # Print emotion-wise accuracy
    print("\nEmotion Breakdown:")
    total_correct = 0
    total_predictions = len(true_emotions)

    for emotion in emotion_counts:
        total_emotion_count = emotion_counts[emotion]
        correct_emotion_count = emotion_correct_counts.get(emotion, 0)
        emotion_accuracy = correct_emotion_count / total_emotion_count * 100

        print(f"{emotion}:")
        print(f"  Total instances: {total_emotion_count}")
        print(f"  Correctly predicted: {correct_emotion_count}")
        print(f"  Accuracy: {emotion_accuracy:.2f}%")

        total_correct += correct_emotion_count

    # Overall accuracy
    overall_accuracy = total_correct / total_predictions * 100
    print(f"\nOverall Emotion Prediction Accuracy: {overall_accuracy:.2f}%")

    # Cause Detection Analysis
    print("\nCause Detection Analysis:")
    total_scenes = len(set(pred['scene_id'] for pred in predictions))
    scenes_with_causes = 0
    total_cause_pairs = 0
    correctly_identified_causes = 0

    # Iterate through predictions to analyze causes
    cause_detection_details = {}

    for pred in predictions:
        # Check if the prediction has cause pairs
        if pred['causes']:
            scenes_with_causes += 1
            total_cause_pairs += len(pred['causes'])

            # Analyze each cause pair
            for cause in pred['causes']:
                # You might want to add more sophisticated cause detection validation
                # This is a placeholder - adjust based on your specific cause detection criteria
                if cause['is_valid_cause']:
                    correctly_identified_causes += 1

                # Collect cause detection stats by scene
                scene_id = pred['scene_id']
                if scene_id not in cause_detection_details:
                    cause_detection_details[scene_id] = {
                        'total_cause_pairs': 0,
                        'correctly_identified_causes': 0
                    }

                cause_detection_details[scene_id]['total_cause_pairs'] += 1
                if cause['is_valid_cause']:
                    cause_detection_details[scene_id]['correctly_identified_causes'] += 1

    # Cause Detection Summary
    print(f"Total Scenes: {total_scenes}")
    print(f"Scenes with Cause Pairs: {scenes_with_causes}")
    print(f"Total Cause Pairs: {total_cause_pairs}")
    print(f"Correctly Identified Causes: {correctly_identified_causes}")

    if total_cause_pairs > 0:
        cause_accuracy = correctly_identified_causes / total_cause_pairs * 100
        print(f"Cause Detection Accuracy: {cause_accuracy:.2f}%")

    # Detailed Scene-wise Cause Detection
    print("\nScene-wise Cause Detection:")
    for scene_id, details in cause_detection_details.items():
        total_pairs = details['total_cause_pairs']
        correct_pairs = details['correctly_identified_causes']
        scene_cause_accuracy = correct_pairs / total_pairs * 100 if total_pairs > 0 else 0

        print(f"Scene {scene_id}:")
        print(f"  Total Cause Pairs: {total_pairs}")
        print(f"  Correctly Identified Causes: {correct_pairs}")
        print(f"  Scene Cause Detection Accuracy: {scene_cause_accuracy:.2f}%")

    return {
        'emotion_accuracy': overall_accuracy,
        'emotion_breakdown': emotion_counts,
        'cause_detection_accuracy': correctly_identified_causes / total_cause_pairs if total_cause_pairs > 0 else 0
    }

def main():
    # Paths
    test_path = 'test.csv'
    model_path = './emotion_cause_model'

    # Load model
    model, tokenizer, emotion_encoder = load_model_and_tokenizer(model_path)

    # Predict and evaluate
    predictions = predict_emotions_and_causes(
        test_path,
        model,
        tokenizer,
        emotion_encoder
    )

    # Evaluate model performance
    evaluation_results = evaluate_model(predictions)

    # Optional: Save predictions to CSV for detailed analysis
    predictions_df = pd.DataFrame(predictions)
    predictions_df.to_csv('model_predictions.csv', index=False)

if __name__ == '__main__':
    main()

Emotion Classification Report:
              precision    recall  f1-score   support

       anger       0.50      0.34      0.41       333
     disgust       0.53      0.10      0.17        79
        fear       0.17      0.27      0.21        56
         joy       0.64      0.27      0.38       429
     neutral       0.57      0.93      0.71      1121
     sadness       0.47      0.08      0.14       241
    surprise       0.67      0.38      0.49       307

    accuracy                           0.56      2566
   macro avg       0.51      0.34      0.36      2566
weighted avg       0.57      0.56      0.51      2566


Emotion-wise Prediction Accuracy:

Emotion Breakdown:
surprise:
  Total instances: 307
  Correctly predicted: 117
  Accuracy: 38.11%
neutral:
  Total instances: 1121
  Correctly predicted: 1046
  Accuracy: 93.31%
joy:
  Total instances: 429
  Correctly predicted: 116
  Accuracy: 27.04%
anger:
  Total instances: 333
  Correctly predicted: 114
  Accuracy: 34.23%
sadness:

In [None]:
def evaluate_model(predictions):
    """
    Provide detailed and realistic evaluation of emotion and cause predictions
    """
    # Separate true and predicted emotions
    true_emotions = [pred['true_emotion'] for pred in predictions]
    predicted_emotions = [pred['predicted_emotion'] for pred in predictions]

    # Class Distribution Analysis
    from collections import Counter
    emotion_distribution = Counter(true_emotions)
    print("Emotion Distribution:")
    for emotion, count in emotion_distribution.items():
        print(f"{emotion}: {count} instances ({count/len(true_emotions)*100:.2f}%)")

    # Classification Report with Balanced Metrics
    print("\nEmotion Classification Report:")
    from sklearn.metrics import classification_report, balanced_accuracy_score
    print(classification_report(true_emotions, predicted_emotions))

    # Balanced Accuracy
    balanced_acc = balanced_accuracy_score(true_emotions, predicted_emotions)
    print(f"\nBalanced Accuracy: {balanced_acc:.4f}")

    # Confusion Matrix for deeper insights
    from sklearn.metrics import confusion_matrix
    import seaborn as sns
    import matplotlib.pyplot as plt

    cm = confusion_matrix(true_emotions, predicted_emotions)
    plt.figure(figsize=(10,8))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=sorted(set(true_emotions)), yticklabels=sorted(set(true_emotions)))
    plt.title('Confusion Matrix of Emotion Predictions')
    plt.xlabel('Predicted Emotion')
    plt.ylabel('True Emotion')
    plt.tight_layout()
    plt.savefig('emotion_confusion_matrix.png')
    plt.close()

    # Emotion-wise Accuracy with Confidence Intervals
    print("\nEmotion-wise Accuracy with 95% Confidence Intervals:")
    from scipy import stats

    for emotion in sorted(set(true_emotions)):
        emotion_mask = [e == emotion for e in true_emotions]
        emotion_true = [t for t, m in zip(true_emotions, emotion_mask) if m]
        emotion_pred = [p for p, m in zip(predicted_emotions, emotion_mask) if m]

        total_emotion_count = len(emotion_true)
        correct_count = sum(t == p for t, p in zip(emotion_true, emotion_pred))

        # Confidence Interval Calculation
        p = correct_count / total_emotion_count
        ci = stats.norm.interval(0.95, loc=p, scale=np.sqrt((p*(1-p))/total_emotion_count))

        print(f"{emotion}:")
        print(f"  Total instances: {total_emotion_count}")
        print(f"  Correctly predicted: {correct_count}")
        print(f"  Accuracy: {p*100:.2f}%")
        print(f"  95% Confidence Interval: [{ci[0]*100:.2f}%, {ci[1]*100:.2f}%]")

    # Cause Detection with Realistic Validation
    print("\nCause Detection Analysis:")
    total_scenes = len(set(pred['scene_id'] for pred in predictions))
    scenes_with_causes = 0
    total_cause_pairs = 0
    correctly_identified_causes = 0

    # More rigorous cause validation
    for pred in predictions:
        if pred['causes']:
            scenes_with_causes += 1
            total_cause_pairs += len(pred['causes'])

            for cause in pred['causes']:
                # Implement more sophisticated cause validation
                # This is a placeholder - you need domain-specific validation
                if cause['cause_statement'] and len(cause['cause_statement'].strip()) > 0:
                    # Add your specific cause validation logic here
                    correctly_identified_causes += 1

    print(f"Total Scenes: {total_scenes}")
    print(f"Scenes with Cause Pairs: {scenes_with_causes}")
    print(f"Total Cause Pairs: {total_cause_pairs}")
    print(f"Possibly Identified Causes: {correctly_identified_causes}")

    if total_cause_pairs > 0:
        cause_accuracy = correctly_identified_causes / total_cause_pairs * 100
        print(f"Cause Detection Potential Accuracy: {cause_accuracy:.2f}%")

    return {
        'balanced_accuracy': balanced_acc,
        'total_instances': len(true_emotions),
        'cause_detection_potential_accuracy': correctly_identified_causes / total_cause_pairs if total_cause_pairs > 0 else 0
    }

def main():
    # Paths
    test_path = 'test.csv'
    model_path = './emotion_cause_model'

    # Load model
    model, tokenizer, emotion_encoder = load_model_and_tokenizer(model_path)

    # Predict and evaluate
    predictions = predict_emotions_and_causes(
        test_path,
        model,
        tokenizer,
        emotion_encoder
    )

    # Evaluate model performance
    evaluation_results = evaluate_model(predictions)

    # Optional: Save predictions to CSV for detailed analysis
    predictions_df = pd.DataFrame(predictions)
    predictions_df.to_csv('model_predictions.csv', index=False)

if __name__ == '__main__':
    main()

Emotion Distribution:
surprise: 307 instances (11.96%)
neutral: 1121 instances (43.69%)
joy: 429 instances (16.72%)
anger: 333 instances (12.98%)
sadness: 241 instances (9.39%)
disgust: 79 instances (3.08%)
fear: 56 instances (2.18%)

Emotion Classification Report:
              precision    recall  f1-score   support

       anger       0.50      0.34      0.41       333
     disgust       0.53      0.10      0.17        79
        fear       0.17      0.27      0.21        56
         joy       0.64      0.27      0.38       429
     neutral       0.57      0.93      0.71      1121
     sadness       0.47      0.08      0.14       241
    surprise       0.67      0.38      0.49       307

    accuracy                           0.56      2566
   macro avg       0.51      0.34      0.36      2566
weighted avg       0.57      0.56      0.51      2566


Balanced Accuracy: 0.3393

Emotion-wise Accuracy with 95% Confidence Intervals:
anger:
  Total instances: 333
  Correctly predicted: 114

In [None]:
!pip install flask flask-ngrok pyngrok




In [None]:
from flask import Flask
from pyngrok import ngrok
import threading

app = Flask(__name__)

@app.route("/predict", methods=["POST"])
def predict():
    data = request.get_json()
    statement = data.get("text", "")

    if not statement:
        return jsonify({"error": "No text provided"}), 400

    try:
        emotion = predict_emotion(statement, model, tokenizer, emotion_encoder)
        return jsonify({"emotion": emotion})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

