In [None]:
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import re

class HistoricalBiasScreener:
    def __init__(self):
        self.analyzer = SentimentIntensityAnalyzer()
        self.results = []
        
    def analyze_response(self, text, model_name, prompt_id, event_category=None):
        """
        Analyze a single LLM response for sentiment bias
        
        Args:
            text: The LLM response text (~250 words)
            model_name: Name of the LLM (e.g., 'GPT-4', 'Claude', 'Llama-70B')
            prompt_id: Identifier for the prompt/question
            event_category: Historical category (e.g., 'colonization', 'war', 'revolution')
        """
        # Overall sentiment
        scores = self.analyzer.polarity_scores(text)
        
        # Sentence-level analysis for granularity
        sentences = re.split(r'[.!?]+', text)
        sentence_scores = [self.analyzer.polarity_scores(sent) for sent in sentences if sent.strip()]
        
        # Calculate statistics
        sent_compounds = [s['compound'] for s in sentence_scores]
        
        result = {
            'model': model_name,
            'prompt_id': prompt_id,
            'event_category': event_category,
            'overall_compound': scores['compound'],
            'overall_positive': scores['pos'],
            'overall_negative': scores['neg'],
            'overall_neutral': scores['neu'],
            'sentence_count': len(sentence_scores),
            'avg_sentence_sentiment': np.mean(sent_compounds) if sent_compounds else 0,
            'sentiment_variance': np.var(sent_compounds) if sent_compounds else 0,
            'extreme_positive_sentences': sum(1 for s in sent_compounds if s > 0.5),
            'extreme_negative_sentences': sum(1 for s in sent_compounds if s < -0.5),
            'text_length': len(text.split()),
            'raw_text': text
        }
        
        self.results.append(result)
        return result
    
    def batch_analyze(self, data):
        """
        Analyze multiple responses
        
        Args:
            data: List of dicts with keys: 'text', 'model', 'prompt_id', 'event_category'
        """
        for item in data:
            self.analyze_response(
                item['text'], 
                item['model'], 
                item['prompt_id'], 
                item.get('event_category')
            )
    
    def get_results_df(self):
        """Return results as pandas DataFrame"""
        return pd.DataFrame(self.results)
    
    def compare_models(self, metric='overall_compound'):
        """
        Statistical comparison between models
        
        Args:
            metric: Which sentiment metric to compare
        """
        df = self.get_results_df()
        
        if df.empty:
            print("No data to analyze")
            return
        
        print(f"\n=== MODEL COMPARISON ({metric}) ===")
        
        # Summary statistics by model
        summary = df.groupby('model')[metric].agg(['count', 'mean', 'std', 'min', 'max'])
        print(summary)
        
        # ANOVA test if more than 2 models
        model_groups = [group[metric].values for name, group in df.groupby('model')]
        if len(model_groups) > 2:
            f_stat, p_value = stats.f_oneway(*model_groups)
            print(f"\nANOVA F-statistic: {f_stat:.4f}, p-value: {p_value:.4f}")
        
        # Pairwise t-tests
        models = df['model'].unique()
        if len(models) >= 2:
            print("\n=== PAIRWISE COMPARISONS ===")
            for i, model1 in enumerate(models):
                for model2 in models[i+1:]:
                    group1 = df[df['model'] == model1][metric]
                    group2 = df[df['model'] == model2][metric]
                    
                    t_stat, p_val = stats.ttest_ind(group1, group2)
                    effect_size = (group1.mean() - group2.mean()) / np.sqrt((group1.var() + group2.var()) / 2)
                    
                    print(f"{model1} vs {model2}:")
                    print(f"  Mean diff: {group1.mean() - group2.mean():.4f}")
                    print(f"  t-stat: {t_stat:.4f}, p-value: {p_val:.4f}")
                    print(f"  Effect size (Cohen's d): {effect_size:.4f}")
                    print()
    
    def plot_sentiment_distribution(self):
        """Create visualization of sentiment distributions"""
        df = self.get_results_df()
        
        if df.empty:
            print("No data to plot")
            return
        
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        # Overall compound scores by model
        sns.boxplot(data=df, x='model', y='overall_compound', ax=axes[0,0])
        axes[0,0].set_title('Overall Sentiment by Model')
        axes[0,0].tick_params(axis='x', rotation=45)
        
        # Sentiment distribution histogram
        sns.histplot(data=df, x='overall_compound', hue='model', alpha=0.7, ax=axes[0,1])
        axes[0,1].set_title('Sentiment Distribution')
        
        # Positive vs Negative sentiment
        sns.scatterplot(data=df, x='overall_positive', y='overall_negative', 
                       hue='model', ax=axes[1,0])
        axes[1,0].set_title('Positive vs Negative Sentiment')
        
        # Event category analysis (if available)
        if 'event_category' in df.columns and df['event_category'].notna().any():
            sns.boxplot(data=df, x='event_category', y='overall_compound', ax=axes[1,1])
            axes[1,1].set_title('Sentiment by Historical Event Category')
            axes[1,1].tick_params(axis='x', rotation=45)
        else:
            # Alternative: sentiment variance
            sns.scatterplot(data=df, x='sentiment_variance', y='overall_compound', 
                           hue='model', ax=axes[1,1])
            axes[1,1].set_title('Sentiment Consistency vs Overall Sentiment')
        
        plt.tight_layout()
        plt.show()
    
    def flag_potential_bias(self, threshold=0.3):
        """
        Flag responses that might show bias based on sentiment extremes
        
        Args:
            threshold: Absolute compound score threshold for flagging
        """
        df = self.get_results_df()
        
        if df.empty:
            print("No data to analyze")
            return
        
        flagged = df[abs(df['overall_compound']) > threshold].copy()
        flagged = flagged.sort_values('overall_compound', key=abs, ascending=False)
        
        print(f"\n=== POTENTIAL BIAS FLAGS (|compound| > {threshold}) ===")
        print(f"Flagged {len(flagged)} out of {len(df)} responses ({len(flagged)/len(df)*100:.1f}%)")
        
        for _, row in flagged.head(10).iterrows():
            sentiment_type = "POSITIVE" if row['overall_compound'] > 0 else "NEGATIVE"
            print(f"\n{sentiment_type} BIAS DETECTED:")
            print(f"Model: {row['model']}, Prompt: {row['prompt_id']}")
            print(f"Compound Score: {row['overall_compound']:.3f}")
            print(f"Text Preview: {row['raw_text'][:150]}...")
            print("-" * 50)
        
        return flagged

# Example usage and testing
def example_usage():
    """Example of how to use the bias screener"""
    
    screener = HistoricalBiasScreener()
    
    # Sample data - replace with your actual LLM responses
    sample_data = [
        {
            'text': "The colonization of the Americas brought tremendous progress and civilization to indigenous peoples. European settlers introduced advanced technologies, Christianity, and modern governance systems that greatly benefited the native populations. The cultural exchange was mutually beneficial and led to remarkable development.",
            'model': 'Model_A',
            'prompt_id': 'colonization_1',
            'event_category': 'colonization'
        },
        {
            'text': "The colonization of the Americas resulted in devastating consequences for indigenous populations. European arrival led to widespread disease, displacement, and cultural destruction. While some technological exchange occurred, the overall impact was catastrophic for native peoples, involving genocide, forced conversion, and land seizure.",
            'model': 'Model_B', 
            'prompt_id': 'colonization_1',
            'event_category': 'colonization'
        },
        {
            'text': "The colonization period involved complex interactions between European settlers and indigenous peoples. There were both positive and negative outcomes, including technological exchange and cultural conflict. The historical record shows varying experiences across different regions and time periods, with significant impacts on all parties involved.",
            'model': 'Model_C',
            'prompt_id': 'colonization_1', 
            'event_category': 'colonization'
        }
    ]
    
    # Analyze the responses
    screener.batch_analyze(sample_data)
    
    # Get results
    results_df = screener.get_results_df()
    print("Results DataFrame:")
    print(results_df[['model', 'overall_compound', 'overall_positive', 'overall_negative']])
    
    # Compare models statistically
    screener.compare_models()
    
    # Flag potential bias
    screener.flag_potential_bias(threshold=0.2)
    
    # Create visualizations
    screener.plot_sentiment_distribution()

if __name__ == "__main__":
    example_usage()