# Anti-Echo Chamber - Article Comparison Tool

This notebook compares news articles and finds opposing viewpoints to break echo chambers.

**Key Features:**
- Query similar articles by topic
- Find opposing viewpoints by political stance
- Interactive article comparison
- ChromaDB-based retrieval system

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AHMerrill/anti-echo-2/blob/master/notebooks/anti_echo_chamber.ipynb)


## Setup and Installation


In [None]:
# Install dependencies
!pip install -q chromadb sentence-transformers transformers huggingface-hub datasets scikit-learn nltk pyyaml

# Set environment variables
import os
import torch
os.environ["CHROMA_TELEMETRY_ENABLED"] = "false"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# GPU/CPU Detection and Configuration
def setup_device():
    """Detect and configure device for optimal performance."""
    if torch.cuda.is_available():
        device = "cuda"
        print(f"🚀 GPU detected: {torch.cuda.get_device_name(0)}")
        print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    else:
        device = "cpu"
        print("💻 Using CPU (GPU not available)")
        os.environ["CUDA_VISIBLE_DEVICES"] = ""
    
    print(f"✅ Device configured: {device}")
    return device

# Setup device
device = setup_device()

# Manual device override (uncomment if needed)
# device = "cpu"  # Force CPU usage
# device = "cuda"  # Force GPU usage (if available)


## Download Core Library and Load Data


In [None]:
# Download the core library and configs from GitHub
!git clone https://github.com/AHMerrill/anti-echo-2.git temp_repo
!cp -r temp_repo/* ./
!rm -rf temp_repo

# Load the core system
from anti_echo_core import AntiEchoCore
from datasets import load_dataset
import pandas as pd
import numpy as np

# Initialize the core system with detected device
print("Initializing Anti-Echo Chamber system...")
core = AntiEchoCore("config/config.yaml", device=device)
print(f"✓ System initialized successfully on {core.device}")

print("📖 Comparison tool ready - no authentication needed (read-only)")


## Load Data from Hugging Face and Recreate ChromaDB


In [None]:
def load_data_and_recreate_chroma(dataset_name="anti-echo-chamber-data"):
    """Load data from Hugging Face and recreate ChromaDB for querying."""
    
    print(f"📥 Loading data from Hugging Face dataset: {dataset_name}")
    
    try:
        # Load the dataset
        dataset = load_dataset(dataset_name, split="train")
        print(f"✓ Loaded {len(dataset)} articles from Hugging Face")
        
        # Convert to list of dictionaries
        articles = []
        for item in dataset:
            articles.append(dict(item))
        
        print(f"📊 Recreating ChromaDB with {len(articles)} articles...")
        
        # Clear existing collections
        try:
            core.chroma_client.delete_collection("news_topic")
            core.chroma_client.delete_collection("news_stance")
        except:
            pass
        
        # Recreate collections
        core.topic_coll = core.chroma_client.create_collection(
            name="news_topic",
            metadata={"hnsw:space": "cosine"}
        )
        core.stance_coll = core.chroma_client.create_collection(
            name="news_stance", 
            metadata={"hnsw:space": "cosine"}
        )
        
        # Recreate ChromaDB from HF data
        for i, article in enumerate(articles):
            if i % 50 == 0:
                print(f"  Processing {i}/{len(articles)} articles...")
            
            # Reconstruct the processed article format
            processed_article = {
                "id": article["id"],
                "title": article["title"],
                "url": article["url"],
                "source": article["source"],
                "published": article["published"],
                "topics": article["topics"],
                "political_leaning": article["political_leaning"],
                "implied_stance": article["implied_stance"],
                "summary": article["summary"],
                "topic_vectors": article["topic_vectors"],
                "stance_embedding": article["stance_embedding"],
                "text_length": article["text_length"]
            }
            
            # Upsert to ChromaDB
            core.upsert_to_chroma(processed_article)
        
        print(f"✅ ChromaDB recreated successfully with {len(articles)} articles")
        return articles
        
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        print("Make sure the dataset exists and you have access to it")
        return []

# Load the data
# You can change the dataset name here if needed
dataset_name = "zanimal/anti-echo-chamber-data"  # Default dataset
# dataset_name = input("Enter dataset name (or press Enter for default): ").strip() or "zanimal/anti-echo-chamber-data"

articles = load_data_and_recreate_chroma(dataset_name)


## Article Comparison Functions


In [None]:
def find_similar_articles(query_text, n_results=5):
    """Find articles with similar topics (by topic overlap count)."""
    print(f"🔍 Finding articles with similar topics to: '{query_text[:100]}...'")
    
    similar_articles = core.query_similar_articles(query_text, n_results)
    
    if similar_articles:
        print(f"\n📰 Found {len(similar_articles)} articles with topic overlap:")
        for i, article in enumerate(similar_articles, 1):
            print(f"\n{i}. {article['title']}")
            print(f"   Source: {article['source']}")
            print(f"   Political Leaning: {article['political_leaning']}")
            print(f"   Implied Stance: {article['implied_stance']}")
            print(f"   Topics: {', '.join(article['topics'])}")
            print(f"   URL: {article['url']}")
    else:
        print("No articles with topic overlap found")
    
    return similar_articles

def find_opposing_articles(query_text, n_results=5):
    """Find articles with opposing political stance (cosine similarity, ascending order)."""
    print(f"⚖️ Finding opposing viewpoints to: '{query_text[:100]}...'")
    print("   (Ranked by stance dissimilarity - most opposing first)")
    
    opposing_articles = core.query_opposing_stance(query_text, n_results)
    
    if opposing_articles:
        print(f"\n📰 Found {len(opposing_articles)} opposing articles (most dissimilar first):")
        for i, article in enumerate(opposing_articles, 1):
            print(f"\n{i}. {article['title']}")
            print(f"   Source: {article['source']}")
            print(f"   Political Leaning: {article['political_leaning']}")
            print(f"   Implied Stance: {article['implied_stance']}")
            print(f"   Summary: {article['summary']}")
            print(f"   URL: {article['url']}")
    else:
        print("No opposing articles found")
    
    return opposing_articles

def compare_articles(article1_text, article2_text):
    """Compare two articles side by side."""
    print("📊 Article Comparison")
    print("=" * 50)
    
    # Process both articles
    article1 = core.process_article({"text": article1_text, "title": "Article 1"})
    article2 = core.process_article({"text": article2_text, "title": "Article 2"})
    
    if not article1 or not article2:
        print("Error processing articles")
        return
    
    print(f"\n📰 Article 1:")
    print(f"   Political Leaning: {article1['political_leaning']}")
    print(f"   Implied Stance: {article1['implied_stance']}")
    print(f"   Topics: {', '.join(article1['topics'])}")
    print(f"   Summary: {article1['summary']}")
    
    print(f"\n📰 Article 2:")
    print(f"   Political Leaning: {article2['political_leaning']}")
    print(f"   Implied Stance: {article2['implied_stance']}")
    print(f"   Topics: {', '.join(article2['topics'])}")
    print(f"   Summary: {article2['summary']}")
    
    # Calculate similarity
    from sklearn.metrics.pairwise import cosine_similarity
    similarity = cosine_similarity(
        [article1['stance_embedding']], 
        [article2['stance_embedding']]
    )[0][0]
    
    print(f"\n🔍 Stance Similarity: {similarity:.3f}")
    
    if article1['political_leaning'] != article2['political_leaning']:
        print("⚖️ Different political leanings - potential opposing viewpoints!")
    else:
        print("🤝 Similar political leanings")
    
    return article1, article2


## Example Usage


In [None]:
# Example 1: Find similar articles
query = "climate change policy"
similar = find_similar_articles(query, n_results=3)

print("\n" + "="*60)

# Example 2: Find opposing viewpoints
opposing = find_opposing_articles(query, n_results=3)

print("\n" + "="*60)

# Example 3: Compare two specific articles
article1_text = "The government should implement strict climate regulations to reduce carbon emissions."
article2_text = "Free market solutions are better than government regulations for addressing climate change."

compare_articles(article1_text, article2_text)
