In [1]:
!git clone https://github.com/Sharp-4rth/temporal_belief_analysis.git

Cloning into 'temporal_belief_analysis'...
remote: Enumerating objects: 523, done.[K
remote: Counting objects: 100% (134/134), done.[K
remote: Compressing objects: 100% (101/101), done.[K
remote: Total 523 (delta 77), reused 54 (delta 28), pack-reused 389 (from 1)[K
Receiving objects: 100% (523/523), 3.40 MiB | 5.60 MiB/s, done.
Resolving deltas: 100% (325/325), done.


In [4]:
# Need to restart after:
!pip install convokit[llm]
!pip install convokit



In [1]:
import sys
import os
os.chdir('/content/temporal_belief_analysis/notebooks')
print("Changed working directory to:", os.getcwd())

# Absolute path to src directory
src_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

Changed working directory to: /content/temporal_belief_analysis/notebooks


In [2]:
import time
!pip install gdown
import zipfile
import nltk
from nltk.corpus import stopwords
from convokit import Corpus, download
import convokit
from temporal_belief.core.timeline_building import TimelineBuilder
from temporal_belief.core.change_detection import ChangeDetector
from temporal_belief.core.window_extraction import WindowExtractor
from temporal_belief.core.op_path_pairing import OpPathPairer
from temporal_belief.data.preprocessors import ChangeDetectorPreprocessor
from temporal_belief.data.preprocessors import PairPreprocessor
from temporal_belief.core.interplay import Interplay
nltk.download('stopwords')

An error occurred: Unsloth currently only works on NVIDIA GPUs and Intel GPUs.


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [3]:
# Download and unzip with python (Dataloading):
# !gdown "https://drive.google.com/file/d/1N0U_jUJlOYjdaju2FaU8p87uB22YBxJ0/view?usp=sharing" -O "/content/temporal_belief_analysis/pd_corpus_with_stances100000_chronological.zip" --fuzzy
# !gdown "https://drive.google.com/file/d/1DLFY6JLMZqNjwvNRZmhlV4-rnoQP_eyH/view?usp=sharing" -O "/content/temporal_belief_analysis/merged_corpus_checkpoint_5.zip" --fuzzy
# !gdown "https://drive.google.com/file/d/1nWaj5N8nsG7u5homv_kAh4CLPDv01M_Z/view?usp=sharing" -O "/content/temporal_belief_analysis/pd_corpus_with_topics.zip" --fuzzy
!gdown "https://drive.google.com/file/d/1AIrstrzE259fcVyxJQW4-RwvAkoUyK1x/view?usp=sharing" -O "/content/temporal_belief_analysis/pd_corpus_with_stances_fine_tuned.zip" --fuzzy

# zipfile.ZipFile("/content/temporal_belief_analysis/pd_corpus_with_stances100000_chronological.zip").extractall("/content/temporal_belief_analysis")
# zipfile.ZipFile("/content/temporal_belief_analysis/merged_corpus_checkpoint_5.zip").extractall("/content/temporal_belief_analysis")
# zipfile.ZipFile("/content/temporal_belief_analysis/pd_corpus_with_topics.zip").extractall("/content/temporal_belief_analysis")
zipfile.ZipFile("/content/temporal_belief_analysis/pd_corpus_with_stances_fine_tuned.zip").extractall("/content/temporal_belief_analysis")

Downloading...
From (original): https://drive.google.com/uc?id=1AIrstrzE259fcVyxJQW4-RwvAkoUyK1x
From (redirected): https://drive.google.com/uc?id=1AIrstrzE259fcVyxJQW4-RwvAkoUyK1x&confirm=t&uuid=ce36a2e2-67b6-4844-8257-d59516a7a1c3
To: /content/temporal_belief_analysis/pd_corpus_with_stances_fine_tuned.zip
100% 1.07G/1.07G [00:11<00:00, 94.2MB/s]


In [4]:
CORPUS_PATH = "/content/temporal_belief_analysis/pd_corpus_with_stances_fine_tuned"

In [5]:
from typing import Dict, Any
from collections import defaultdict
import logging

MERGED_TOPIC = {
    # Economy
    'economic policy': 'Economy & Tax',
    'taxation and government spending': 'Economy & Tax',

    # Healthcare
    'healthcare policy': 'Healthcare',

    # Civil rights / justice / education / voting
    'civil rights and social issues': 'Civil Rights, Justice & Education',
    'criminal justice and policing': 'Civil Rights, Justice & Education',
    'voting rights and elections': 'Civil Rights, Justice & Education',
    'education policy': 'Civil Rights, Justice & Education',

    # Hot-button singles
    'gun rights and control': 'Guns',
    'abortion and reproductive rights': 'Abortion',
    'immigration policy': 'Immigration',
    'climate change and energy policy': 'Climate & Energy',

    # Foreign / defense
    'foreign policy and defense': 'Foreign & Defense',

    # Meta / process / actors
    'political figures and campaigns': 'Process & Actors (Meta)',
    'congressional politics': 'Process & Actors (Meta)',
    'electoral politics': 'Process & Actors (Meta)',
    'political parties and ideology': 'Process & Actors (Meta)',
    'media and political commentary': 'Process & Actors (Meta)',
}


class TimelineBuilder:
    """Simple timeline builder for user belief tracking.

    Builds structure: {user_id: {topic: {utterance_id: stance}}}
    """

    def __init__(self, corpus, min_posts_per_topic: int = 0, min_topics_per_user: int = 0):
        self.corpus = corpus
        self.min_posts_per_topic = min_posts_per_topic
        self.min_topics_per_user = min_topics_per_user
        self.logger = logging.getLogger(__name__)

    def build_timelines(self, include_all=True) -> Dict[str, Dict[str, Dict[str, str]]]:
        """Build user timelines from corpus with stance metadata.

        Returns:
            {user_id: {topic: {utterance_id: stance}}}
        """
        # Group by user and topic
        user_topic_posts = defaultdict(lambda: defaultdict(list))

        for utterance in self.corpus.iter_utterances():
            # Skip if no stance metadata on utterance
            if include_all == False:
                if not utterance.meta or 'detected_stance' not in utterance.meta:
                    continue

            # Get topic from conversation metadata
            conversation = utterance.get_conversation()
            if not conversation or not conversation.meta or 'detected_topic' not in conversation.meta:
                continue

            if not utterance.timestamp:
                continue

            user_id = utterance.speaker.id
            old_topic = conversation.meta['detected_topic']
            topic = MERGED_TOPIC.get(old_topic, old_topic)
            stance = utterance.meta.get('detected_stance', 'Unknown')

            user_topic_posts[user_id][topic].append({
                'utterance_id': utterance.id,
                'timestamp': utterance.timestamp,
                'stance': stance
            })

        # Filter and sort
        timelines = {}
        for user_id, topic_posts in user_topic_posts.items():
            user_timeline = {}

            for topic, posts in topic_posts.items():
                if len(posts) >= self.min_posts_per_topic:
                    # Sort chronologically
                    posts.sort(key=lambda x: x['timestamp'])

                    # Create topic timeline
                    topic_timeline = {}
                    for post in posts:
                        topic_timeline[post['utterance_id']] = post['stance']

                    user_timeline[topic] = topic_timeline

            # Only include users with enough topics
            if len(user_timeline) >= self.min_topics_per_user:
                timelines[user_id] = user_timeline

        self.logger.info(f"Built timelines for {len(timelines)} users")
        return timelines

In [6]:
import numpy as np
from collections import Counter
import logging
from typing import Dict, List, Tuple, Any, Optional

class ChangeDetector:
    """CUSUM-based change detection for political stance shifts.

    Focuses on detecting changes between 'left-leaning' and 'right-leaning' positions,
    ignoring neutral stances. Uses cumulative sum control charts to identify
    significant shifts in political orientation over time.
    """

    def __init__(self, threshold=3.0, drift=0.5, min_change_separation=5):
        """Initialize CUSUM detector with control parameters.

        Args:
            threshold: Detection threshold for CUSUM statistic (higher = less sensitive)
            drift: Reference drift value for change detection (typically 0.5-1.0)
            min_change_separation: Minimum posts between detected changes
        """
        self.threshold = threshold
        self.drift = drift
        self.min_change_separation = min_change_separation

        # Map stances to numeric values for CUSUM
        self.stance_values = {
            'left-leaning': -1.0,
            'neutral': 0.0,        # Will be filtered out
            'right-leaning': 1.0
        }

        self.all_change_points = []
        self.all_no_change_points = []

        # Logging setup
        self.logger = logging.getLogger(__name__)

    def _to_probs(self, item):
        """Convert various input formats to probability tuple (pL, pN, pR)."""
        if isinstance(item, str):
            if item == 'left-leaning':  return (1.0, 0.0, 0.0)
            if item == 'neutral':       return (0.0, 1.0, 0.0)
            if item == 'right-leaning': return (0.0, 0.0, 1.0)
            return (0.0, 1.0, 0.0)
        if isinstance(item, dict):
            return (float(item.get('pL', 0.0)), float(item.get('pN', 0.0)), float(item.get('pR', 0.0)))
        if isinstance(item, (list, tuple)) and len(item) == 3:
            pL, pN, pR = item
            return (float(pL), float(pN), float(pR))
        return (0.0, 1.0, 0.0)

    def _get_political_signal(self, prob_tuple, conf_threshold=0.6):
        """Extract political signal from probability tuple, ignoring neutral.

        Args:
            prob_tuple: (pL, pN, pR) probability tuple
            conf_threshold: Minimum confidence to consider stance reliable

        Returns:
            Float value: -1.0 (left), +1.0 (right), or None (neutral/uncertain)
        """
        pL, pN, pR = prob_tuple

        # Only consider if we have sufficient confidence in left or right
        if pL >= conf_threshold:
            return -1.0  # left-leaning
        elif pR >= conf_threshold:
            return 1.0   # right-leaning
        else:
            return None  # neutral or uncertain - ignore for CUSUM

    def detect_cusum_changes(self, topic_timeline, conf_threshold=0.6):
        """Detect political stance changes using CUSUM algorithm.

        Args:
            topic_timeline: List of (utterance_id, stance_data) tuples
            conf_threshold: Minimum confidence for reliable stance detection

        Returns:
            Dictionary with change_points and no_change_points lists
        """
        if not topic_timeline:
            return {'change_points': [], 'no_change_points': []}

        # Extract political signals, filtering out neutral/uncertain
        signals = []
        valid_utterances = []

        for utt_id, stance_data in topic_timeline:
            prob_tuple = self._to_probs(stance_data)
            signal = self._get_political_signal(prob_tuple, conf_threshold)

            if signal is not None:
                signals.append(signal)
                valid_utterances.append(utt_id)

        if len(signals) < 3:
            self.logger.warning(f"Insufficient political signals for CUSUM: {len(signals)}")
            return {'change_points': [], 'no_change_points': [utt_id for utt_id, _ in topic_timeline]}

        # CUSUM change detection
        change_indices = self._cusum_detect_changes(signals)

        # Convert indices back to utterance IDs
        change_points = [valid_utterances[idx] for idx in change_indices if idx < len(valid_utterances)]

        # All other utterances are no-change points
        change_set = set(change_points)
        no_change_points = [utt_id for utt_id, _ in topic_timeline if utt_id not in change_set]

        # Store for aggregate statistics
        self.all_change_points.extend(change_points)
        self.all_no_change_points.extend(no_change_points)

        return {
            'change_points': change_points,
            'no_change_points': no_change_points
        }

    def _cusum_detect_changes(self, signals):
        """Core CUSUM algorithm for detecting mean shifts in political stance.

        Args:
            signals: List of political stance values (-1.0 or +1.0)

        Returns:
            List of indices where significant changes were detected
        """
        if len(signals) < 2:
            return []

        signals = np.array(signals)
        n = len(signals)
        change_points = []

        # Calculate overall mean for reference
        overall_mean = np.mean(signals)

        # Initialize CUSUM statistics
        cusum_pos = 0.0  # Positive CUSUM (detecting upward shifts)
        cusum_neg = 0.0  # Negative CUSUM (detecting downward shifts)

        for i in range(1, n):
            # Calculate deviations from reference mean
            deviation = signals[i] - overall_mean

            # Update CUSUM statistics
            cusum_pos = max(0, cusum_pos + deviation - self.drift)
            cusum_neg = max(0, cusum_neg - deviation - self.drift)

            # Check for threshold crossings
            change_detected = False

            if cusum_pos > self.threshold:
                # Positive shift detected (towards right-leaning)
                change_points.append(i)
                cusum_pos = 0.0  # Reset after detection
                change_detected = True
                self.logger.debug(f"CUSUM: Positive shift detected at index {i}")

            elif cusum_neg > self.threshold:
                # Negative shift detected (towards left-leaning)
                change_points.append(i)
                cusum_neg = 0.0  # Reset after detection
                change_detected = True
                self.logger.debug(f"CUSUM: Negative shift detected at index {i}")

            # Enforce minimum separation between changes
            if change_detected and len(change_points) > 1:
                if i - change_points[-2] < self.min_change_separation:
                    change_points.pop()  # Remove this change point
                    self.logger.debug(f"CUSUM: Removed change point at {i} due to minimum separation")

        return change_points

    def detect_cusum_changes_advanced(self, topic_timeline, conf_threshold=0.6,
                                    adaptive_threshold=True):
        """Advanced CUSUM with adaptive thresholding and confidence weighting.

        Args:
            topic_timeline: List of (utterance_id, stance_data) tuples
            conf_threshold: Minimum confidence for reliable stance detection
            adaptive_threshold: Whether to adapt threshold based on signal variance

        Returns:
            Dictionary with change_points and no_change_points lists
        """
        if not topic_timeline:
            return {'change_points': [], 'no_change_points': []}

        # Extract weighted political signals
        signals = []
        confidences = []
        valid_utterances = []

        for utt_id, stance_data in topic_timeline:
            prob_tuple = self._to_probs(stance_data)
            signal = self._get_political_signal(prob_tuple, conf_threshold)

            if signal is not None:
                signals.append(signal)
                # Extract confidence from stance_data if available
                confidence = self._extract_confidence(stance_data)
                confidences.append(confidence)
                valid_utterances.append(utt_id)

        if len(signals) < 3:
            return {'change_points': [], 'no_change_points': [utt_id for utt_id, _ in topic_timeline]}

        # Adaptive threshold based on signal variance
        threshold = self.threshold
        if adaptive_threshold:
            signal_std = np.std(signals)
            threshold = max(self.threshold, 2.0 * signal_std)
            self.logger.debug(f"CUSUM: Adaptive threshold set to {threshold:.2f}")

        # Confidence-weighted CUSUM
        change_indices = self._cusum_detect_changes_weighted(signals, confidences, threshold)

        change_points = [valid_utterances[idx] for idx in change_indices if idx < len(valid_utterances)]
        change_set = set(change_points)
        no_change_points = [utt_id for utt_id, _ in topic_timeline if utt_id not in change_set]

        self.all_change_points.extend(change_points)
        self.all_no_change_points.extend(no_change_points)

        return {
            'change_points': change_points,
            'no_change_points': no_change_points
        }

    def _cusum_detect_changes_weighted(self, signals, confidences, threshold):
        """CUSUM with confidence weighting for more reliable change detection."""
        signals = np.array(signals)
        confidences = np.array(confidences)
        n = len(signals)
        change_points = []

        # Confidence-weighted mean
        weighted_mean = np.average(signals, weights=confidences)

        # Initialize CUSUM with confidence weighting
        cusum_pos = 0.0
        cusum_neg = 0.0

        for i in range(1, n):
            # Weight deviation by confidence
            deviation = (signals[i] - weighted_mean) * confidences[i]

            # Update CUSUM statistics
            cusum_pos = max(0, cusum_pos + deviation - self.drift)
            cusum_neg = max(0, cusum_neg - deviation - self.drift)

            # Detection with separation enforcement
            if cusum_pos > threshold or cusum_neg > threshold:
                if not change_points or i - change_points[-1] >= self.min_change_separation:
                    change_points.append(i)
                    cusum_pos = 0.0
                    cusum_neg = 0.0

                    direction = "right" if cusum_pos > cusum_neg else "left"
                    self.logger.debug(f"CUSUM: {direction} shift detected at index {i}, confidence={confidences[i]:.2f}")

        return change_points

    def _extract_confidence(self, stance_data):
        """Extract confidence score from stance data."""
        if isinstance(stance_data, dict):
            return stance_data.get('confidence', 1.0)
        elif isinstance(stance_data, (list, tuple)) and len(stance_data) == 3:
            # Use max probability as confidence
            return max(stance_data)
        else:
            return 1.0  # Default confidence

    def _get_political_signal(self, prob_tuple, conf_threshold=0.6):
        """Extract political signal, ignoring neutral positions."""
        pL, pN, pR = prob_tuple

        # Only consider confident left/right positions
        if pL >= conf_threshold:
            return -1.0  # left-leaning
        elif pR >= conf_threshold:
            return 1.0   # right-leaning
        else:
            return None  # neutral/uncertain - ignore

    def get_two_groups(self, timelines, method='cusum', conf_threshold=0.6,
                      advanced=True, **kwargs):
        """
        Group users into with/without changes using CUSUM detection.

        Args:
            timelines: Dictionary of {user_id: {topic: timeline}} data
            method: Detection method ('cusum' or 'cusum_advanced')
            conf_threshold: Minimum confidence for reliable stance detection
            advanced: Whether to use confidence-weighted CUSUM
            **kwargs: Additional parameters (threshold, drift, etc.)

        Returns:
            Dictionary with 'with_changes' and 'no_changes' user groups
        """
        with_changes = {}
        no_changes = {}

        # Update detector parameters from kwargs
        if 'threshold' in kwargs:
            self.threshold = kwargs['threshold']
        if 'drift' in kwargs:
            self.drift = kwargs['drift']
        if 'min_change_separation' in kwargs:
            self.min_change_separation = kwargs['min_change_separation']

        # Select detection method
        if advanced:
            detect_func = lambda tl: self.detect_cusum_changes_advanced(
                tl, conf_threshold=conf_threshold, **kwargs
            )
        else:
            detect_func = lambda tl: self.detect_cusum_changes(
                tl, conf_threshold=conf_threshold
            )

        self.logger.info(f"Starting CUSUM change detection with threshold={self.threshold}, "
                        f"drift={self.drift}, advanced={advanced}")

        for user_id, topic_timelines in timelines.items():
            user_has_changes = False

            for topic_name, topic_timeline in topic_timelines.items():
                # Convert to list format expected by detection methods
                topic_timeline_list = list(topic_timeline.items())

                # Run CUSUM change detection
                changes = detect_func(topic_timeline_list)

                if changes['change_points']:
                    user_has_changes = True
                    if user_id not in with_changes:
                        with_changes[user_id] = {}

                    # Store change points with their stance data
                    with_changes[user_id][topic_name] = {
                        utt_id: topic_timeline[utt_id]
                        for utt_id in changes['change_points']
                    }

            # Users without any detected changes
            if not user_has_changes:
                no_changes[user_id] = topic_timelines

        # Log summary statistics
        self.logger.info(f"CUSUM Results: {len(with_changes)} users with changes, "
                        f"{len(no_changes)} users without changes")
        self.logger.info(f"Total change points detected: {len(self.all_change_points)}")

        return {
            'with_changes': with_changes,
            'no_changes': no_changes,
            'summary': {
                'users_with_changes': len(with_changes),
                'users_without_changes': len(no_changes),
                'total_change_points': len(self.all_change_points),
                'detection_parameters': {
                    'threshold': self.threshold,
                    'drift': self.drift,
                    'min_separation': self.min_change_separation,
                    'conf_threshold': conf_threshold
                }
            }
        }

    def analyze_change_patterns(self, with_changes_data):
        """Analyze patterns in detected political stance changes.

        Args:
            with_changes_data: Users with detected changes from get_two_groups()

        Returns:
            Dictionary containing change pattern analysis
        """
        all_changes = []

        for user_id, topics in with_changes_data.items():
            for topic_name, change_points in topics.items():
                for utt_id, stance_data in change_points.items():
                    prob_tuple = self._to_probs(stance_data)
                    signal = self._get_political_signal(prob_tuple)

                    if signal is not None:
                        all_changes.append({
                            'user_id': user_id,
                            'topic': topic_name,
                            'utterance_id': utt_id,
                            'direction': 'left_shift' if signal < 0 else 'right_shift',
                            'magnitude': abs(signal),
                            'confidence': self._extract_confidence(stance_data)
                        })

        if not all_changes:
            return {'total_changes': 0}

        # Analyze patterns
        change_directions = [c['direction'] for c in all_changes]
        change_magnitudes = [c['magnitude'] for c in all_changes]
        change_confidences = [c['confidence'] for c in all_changes]

        direction_counts = Counter(change_directions)

        return {
            'total_changes': len(all_changes),
            'direction_distribution': dict(direction_counts),
            'average_magnitude': np.mean(change_magnitudes),
            'average_confidence': np.mean(change_confidences),
            'left_shifts': direction_counts.get('left_shift', 0),
            'right_shifts': direction_counts.get('right_shift', 0),
            'most_common_direction': direction_counts.most_common(1)[0] if direction_counts else None
        }

    def tune_cusum_parameters(self, validation_timeline, known_changes=None):
        """Tune CUSUM parameters for optimal performance on validation data.

        Args:
            validation_timeline: Timeline with known change points for tuning
            known_changes: List of known change points for comparison

        Returns:
            Dictionary with optimal parameters and performance metrics
        """
        # Parameter grid for tuning
        threshold_values = [1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 5.0]
        drift_values = [0.3, 0.5, 0.7, 1.0]

        best_params = None
        best_score = -1.0
        results = []

        for threshold in threshold_values:
            for drift in drift_values:
                # Temporarily set parameters
                original_threshold = self.threshold
                original_drift = self.drift

                self.threshold = threshold
                self.drift = drift

                # Test detection
                detected = self.detect_cusum_changes(validation_timeline)

                # Calculate performance metrics
                if known_changes:
                    precision, recall, f1 = self._calculate_detection_metrics(
                        detected['change_points'], known_changes
                    )
                    score = f1
                else:
                    # Use change detection rate as proxy metric
                    score = len(detected['change_points']) / max(1, len(validation_timeline))

                results.append({
                    'threshold': threshold,
                    'drift': drift,
                    'score': score,
                    'change_points': len(detected['change_points'])
                })

                if score > best_score:
                    best_score = score
                    best_params = {'threshold': threshold, 'drift': drift}

                # Restore original parameters
                self.threshold = original_threshold
                self.drift = original_drift

        # Set best parameters
        if best_params:
            self.threshold = best_params['threshold']
            self.drift = best_params['drift']

        self.logger.info(f"CUSUM tuning complete. Best params: {best_params}, Score: {best_score:.3f}")

        return {
            'best_parameters': best_params,
            'best_score': best_score,
            'all_results': results
        }

    def _calculate_detection_metrics(self, detected_changes, known_changes):
        """Calculate precision, recall, and F1 for change detection."""
        detected_set = set(detected_changes)
        known_set = set(known_changes)

        true_positives = len(detected_set & known_set)
        false_positives = len(detected_set - known_set)
        false_negatives = len(known_set - detected_set)

        precision = true_positives / max(1, true_positives + false_positives)
        recall = true_positives / max(1, true_positives + false_negatives)
        f1 = 2 * precision * recall / max(1, precision + recall)

        return precision, recall, f1

    def get_change_statistics(self):
        """Get aggregate statistics across all processed timelines."""
        total_points = len(self.all_change_points) + len(self.all_no_change_points)
        change_rate = len(self.all_change_points) / max(1, total_points)

        return {
            'total_change_points': len(self.all_change_points),
            'total_no_change_points': len(self.all_no_change_points),
            'overall_change_rate': change_rate,
            'detection_parameters': {
                'threshold': self.threshold,
                'drift': self.drift,
                'min_separation': self.min_change_separation
            }
        }

# # Example usage and testing
# if __name__ == "__main__":
#     # Setup logging
#     logging.basicConfig(level=logging.INFO)

#     # Initialize CUSUM detector
#     detector = CUSUMChangeDetector(threshold=3.0, drift=0.5, min_change_separation=3)

#     # Example timeline data
#     example_timeline = [
#         ('post_1', {'pL': 0.8, 'pN': 0.1, 'pR': 0.1, 'confidence': 0.9}),  # left
#         ('post_2', {'pL': 0.7, 'pN': 0.2, 'pR': 0.1, 'confidence': 0.8}),  # left
#         ('post_3', {'pL': 0.6, 'pN': 0.3, 'pR': 0.1, 'confidence': 0.7}),  # left
#         ('post_4', {'pL': 0.2, 'pN': 0.3, 'pR': 0.5, 'confidence': 0.6}),  # uncertain
#         ('post_5', {'pL': 0.1, 'pN': 0.2, 'pR': 0.7, 'confidence': 0.8}),  # right - CHANGE!
#         ('post_6', {'pL': 0.1, 'pN': 0.1, 'pR': 0.8, 'confidence': 0.9}),  # right
#         ('post_7', {'pL': 0.1, 'pN': 0.2, 'pR': 0.7, 'confidence': 0.8}),  # right
#     ]

#     # Test CUSUM detection
#     print("Testing CUSUM Change Detection")
#     print("=" * 40)

#     # Basic CUSUM
#     result_basic = detector.detect_cusum_changes(example_timeline)
#     print(f"Basic CUSUM - Changes: {result_basic['change_points']}")

#     # Advanced CUSUM
#     result_advanced = detector.detect_cusum_changes_advanced(example_timeline)
#     print(f"Advanced CUSUM - Changes: {result_advanced['change_points']}")

#     # Example with multiple users
#     example_timelines = {
#         'user_001': {'politics': dict(example_timeline)},
#         'user_002': {'politics': {
#             'post_a': 'left-leaning',
#             'post_b': 'left-leaning',
#             'post_c': 'neutral',
#             'post_d': 'right-leaning'  # Simple change
#         }}
#     }

#     # Test group classification
#     groups = detector.get_two_groups(example_timelines, conf_threshold=0.6, advanced=True)
#     print(f"\nGroup Analysis:")
#     print(f"Users with changes: {list(groups['with_changes'].keys())}")
#     print(f"Users without changes: {list(groups['no_changes'].keys())}")

#     # Change pattern analysis
#     if groups['with_changes']:
#         patterns = detector.analyze_change_patterns(groups['with_changes'])
#         print(f"\nChange Patterns:")
#         print(f"Total changes detected: {patterns['total_changes']}")
#         print(f"Direction distribution: {patterns['direction_distribution']}")
#         print(f"Average confidence: {patterns['average_confidence']:.3f}")

#     # Overall statistics
#     stats = detector.get_change_statistics()
#     print(f"\nOverall Statistics:")
#     print(f"Change rate: {stats['overall_change_rate']:.3f}")
#     print(f"Detection threshold: {stats['detection_parameters']['threshold']}")

In [7]:
class WindowExtractor:
    """ Find the conversations around the change point """

    def __init__(self, corpus, timelines):
        self.corpus = corpus
        self.timelines = timelines
        self.user_conversations_cache = {}  # Add cache

    def build_global_user_conversations_index(self):
        """Build sorted conversations for ALL users upfront"""
        print("Building global user conversations index...")
        user_conversations = {}

        convos = list(corpus.iter_conversations())
        for convo in convos:
            # Get all speakers in this conversation
            speakers = {utt.speaker.id for utt in convo.iter_utterances()}

            # Add this conversation to each speaker's list
            for speaker_id in speakers:
                if speaker_id not in user_conversations:
                    user_conversations[speaker_id] = []
                user_conversations[speaker_id].append(convo)

        # Sort each user's conversations once
        for speaker_id in user_conversations:
            user_conversations[speaker_id].sort(
                key=lambda convo: min(utt.timestamp for utt in convo.iter_utterances())
            )

        print(f"Index built for {len(user_conversations)} users!")

        self.user_conversations_cache = user_conversations

    def get_user_conversations_chronological_old(self, corpus, speaker_id):
        """Get all conversations for a user in chronological order."""

        # Check cache first
        if speaker_id in self.user_conversations_cache:
            return self.user_conversations_cache[speaker_id]

        # Get all conversations where the speaker participated
        user_conversations = [convo for convo in corpus.iter_conversations()
                              if speaker_id in [utt.speaker.id for utt in convo.iter_utterances()]]

        # Sort conversations by their earliest timestamp
        user_conversations.sort(key=lambda convo: min(utt.timestamp for utt in convo.iter_utterances()))

        # Cache the result
        self.user_conversations_cache[speaker_id] = user_conversations

        return user_conversations

    def get_user_conversations_chronological(self, corpus, speaker_id):
        return self.user_conversations_cache.get(speaker_id, [])

    def get_conversations_around_change_point(self, corpus, change_point, test=False, window=10):
        # Get first change (probably only one I need)
        utterance = corpus.get_utterance(change_point)

        # Find the convo this utterance belongs to:
        conversation = utterance.get_conversation()

        # Put all user's convos in a list
        speaker_id = utterance.speaker.id
        if test is True:
            user_conversations = self.get_user_conversations_chronological_old(corpus, speaker_id)
        else:
            user_conversations = self.get_user_conversations_chronological(corpus, speaker_id)
            print(f"Cache: {user_conversations}")

        candidate_convos = []
        # find the index of the convo, and return the convo id of the 3 prior convos
        for i, convo in enumerate(user_conversations):
            if conversation.id == user_conversations[i].id:
                # Check if there are at least two conversations before the current one
                # To this:
                if i >= window:
                    # Get the 'window' number of conversations before the current one
                    candidate_convos.extend(user_conversations[i-10:i])
                else:
                    # If there are fewer than 10 conversations before, get all of them
                    candidate_convos.extend(user_conversations[:i])

                # Append the current conversation with the change point
                candidate_convos.append(conversation)
                break  # Found the conversation, no need to continue the loop

        return candidate_convos

In [8]:
corpus = Corpus(filename=CORPUS_PATH)

No configuration file found at /root/.convokit/config.yml; writing with contents: 
# Default Backend Parameters
db_host: localhost:27017
data_directory: ~/.convokit/saved-corpora
model_directory: ~/.convokit/saved-models
default_backend: mem


In [9]:
timeline_builder = TimelineBuilder(corpus)
timelines = timeline_builder.build_timelines()

In [None]:
# User to test:
user_id = "HardCoreModerate"
topic = "economic policy"

In [10]:
change_detector = ChangeDetector()

In [None]:
change_detector = ChangeDetector()
topic_timeline = timelines[user_id][topic]
topic_timeline_list = list(topic_timeline.items())
change_points = change_detector.detect_cusum_changes(topic_timeline_list)['change_points']
# print(change_points)

KeyError: 'economic policy'

In [11]:
window_extractor = WindowExtractor(corpus, timelines)
candidate_convos = window_extractor.get_conversations_around_change_point(change_point=change_points[0], corpus=corpus, test=True)
print(f"Candidate convos: {[convo for convo in candidate_convos]}")

NameError: name 'change_points' is not defined

In [12]:
window_extractor.build_global_user_conversations_index()

Building global user conversations index...


KeyboardInterrupt: 

In [13]:

class OpPathPairer:
    """ Pair OP utterances with a path of responses by a user/challenger"""
    def __init__(self, corpus, timelines):
        self.corpus = corpus
        self.timelines = timelines

    def _trim_paths(self, op_utterance):
        try:
            conversation = op_utterance.get_conversation()
        except Exception as e:
            print(f"Can't access convo from utterance, error{e}")

        paths = conversation.get_root_to_leaf_paths()

        trimmed_paths = []
        for path in paths:
            if op_utterance in path:
                # Find where op_utterance is in this path
                op_index = path.index(op_utterance)
                # Slice from that index onwards
                trimmed_path = path[op_index + 1:]
                trimmed_paths.append(trimmed_path)

        return trimmed_paths

    def _filter_paths(self, trimmed_paths, op_speaker_id):
        """Filter paths to create rooted path-units, excluding OP utterances"""
        filtered_paths = {}

        for path_index, path in enumerate(trimmed_paths):
            for utt in path:
                # Skip if this utterance is from the OP
                if utt.speaker.id == op_speaker_id:
                    continue

                key = f"{utt.speaker.id}_path_{path_index}"
                if key not in filtered_paths:
                    filtered_paths[key] = []
                filtered_paths[key].append(utt)

        return filtered_paths

    def extract_rooted_paths(self, op_utterance):
        trimmed_path = self._trim_paths(op_utterance)
        # Pass the OP's speaker ID to filter method
        filtered_path = self._filter_paths(trimmed_path, op_utterance.speaker.id)

        return filtered_path

    # Find the op_utterances from a convo and add them to a list
    def extract_op_utterances_from_convo(self, candidate_convo, user_id):
        paths = candidate_convo.get_root_to_leaf_paths()
        op_utterances = []
        for path in paths:
            for utt in path:
                if utt.speaker.id == user_id and utt not in op_utterances:
                    op_utterances.append(utt)
                    break

        return op_utterances

    # Get all op_utterances accross every candidate convo
    def extract_op_utterances_from_all_convos(self, candidate_convos, user_id):
        all_op_utterances = []
        for candidate_convo in candidate_convos:
            op_utterances = self.extract_op_utterances_from_convo(candidate_convo, user_id)
            all_op_utterances.extend(op_utterances)

        return all_op_utterances

    # Get the paths of an op_utterance from the op_utterances list
    def extract_rooted_path_from_candidate_convos(self, candidate_convos, user_id):
        all_op_utterances = self.extract_op_utterances_from_all_convos(candidate_convos, user_id)

        # debug:
        # for op_utt in all_op_utterances:
        #     print(f'my input user_id: {user_id}')
        #     speaker_id = self.corpus.get_utterance(op_utt.id).speaker.id
        #     print(f'Utt_id: {op_utt.id} and user_id: {speaker_id} in the list of all op utterances.')

        all_ops_n_paths = []
        for op_utt in all_op_utterances:
            # So rooted paths is a dict. Should I convert to list?
            rooted_paths = self.extract_rooted_paths(op_utt)

            op_n_paths = (op_utt, rooted_paths)
            all_ops_n_paths.append(op_n_paths)

        return all_ops_n_paths

In [14]:
# This should be a list of tuples, where the second part is the text of the concatenated utterances of a user
op_path_pairer = OpPathPairer(corpus, timelines)
# op_path_pairs = op_path_pairer.extract_rooted_path_from_candidate_convos(candidate_convos, user_id)
pair_preprocessor = PairPreprocessor()
# preprocessed_pairs = pair_preprocessor.concatenate_path_in_all_pairs(op_path_pairs)
# print(preprocessed_pairs)

# for pair in preprocessed_pairs:
#     print(100*'===')
#     op = pair[0]
#     paths = pair[1]
#     print(f"OP: {op.speaker.id}, Text: {op.text}")
#     for id, text in paths.items():
#         print(100*'---')
#         print(f"ID: {id}, Text: {text}")
#     print(100*'§§§§§§')

In [15]:
# get rid of gobal vars and stuff. clean it up.

In [16]:
root_to_leaf_paths = candidate_convos[0].get_root_to_leaf_paths()

for path in root_to_leaf_paths:
    for utt in path:
        print(f"Speaker: {utt.speaker.id}, Text: {utt.text}")

NameError: name 'candidate_convos' is not defined

In [17]:
# Load English stop words
stop_words_set = set(stopwords.words('english'))
persuasion_analyzer = Interplay()

In [18]:
# use the groups
groups = change_detector.get_two_groups(timelines)
groups_tuple = (groups['with_changes'], groups['no_changes'])

In [19]:
!python -m spacy download en_core_web_sm
nlp = spacy.load('en_core_web_sm')

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m35.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


NameError: name 'spacy' is not defined

In [20]:
nlp = spacy.load('en_core_web_sm')

NameError: name 'spacy' is not defined

In [21]:
import spacy
from convokit import PolitenessStrategies, Utterance, Speaker

# Load spaCy model (do this once at the top of your script)
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Please install spaCy English model: python -m spacy download en_core_web_sm")
    raise

# Initialize PolitenessStrategies
politeness_analyzer = PolitenessStrategies()

def get_politeness_score(concatenated_path_text):
    """
    Get politeness strategy scores from challenger's concatenated reply chain
    """
    # Create a dummy speaker
    temp_speaker = Speaker(id="temp_speaker")

    # Create utterance with the speaker
    temp_utterance = Utterance(
        id="temp_id",
        speaker=temp_speaker,
        text=concatenated_path_text
    )

    # Transform utterance to get politeness scores
    politeness_analyzer.transform_utterance(temp_utterance, spacy_nlp=nlp)

    # Access the politeness strategies from the utterance's metadata
    politeness_strategies_dict = temp_utterance.meta.get('politeness_strategies', {})

    return {
        'politeness_gratitude': politeness_strategies_dict.get('feature_politeness_==Gratitude==', 0),
        'politeness_apologizing': politeness_strategies_dict.get('feature_politeness_==Apologizing==', 0),
        'politeness_please': politeness_strategies_dict.get('feature_politeness_==Please==', 0),
        'politeness_indirect_greeting': politeness_strategies_dict.get('feature_politeness_==Indirect_(greeting)==', 0),
        'politeness_please_start': politeness_strategies_dict.get('feature_politeness_==Please_start==', 0),
        'politeness_hashedge': politeness_strategies_dict.get('feature_politeness_==HASHEDGE==', 0),
        'politeness_deference': politeness_strategies_dict.get('feature_politeness_==Deference==', 0),
    }

In [22]:
# Feature extraction functions (return features, not scores)
def extract_argument_complexity_features(text):
    words = text.split()
    sentences = [s for s in text.split('.') if s.strip()]
    subordinating = ['because', 'since', 'although', 'while', 'whereas', 'if']

    return {
        'word_count': len(words),
        'unique_words': len(set(words)),
        'sentence_count': len(sentences),
        'subordinating_count': sum(text.lower().count(word) for word in subordinating)
    }

def extract_evidence_features(text):
    import re
    evidence_patterns = [
        r'http[s]?://\S+',
        r'according to',
        r'research shows',
        r'studies indicate',
        r'data suggests',
        r'statistics show',
        r'survey found',
        r'report states'
    ]

    evidence_counts = {}
    for i, pattern in enumerate(evidence_patterns):
        evidence_counts[f'evidence_type_{i}'] = len(re.findall(pattern, text.lower()))

    return evidence_counts

def extract_hedging_features(text):
    hedges = [
        'might', 'could', 'perhaps', 'possibly', 'probably', 'likely',
        'seems', 'appears', 'suggests', 'indicates', 'tends to',
        'generally', 'usually', 'often', 'sometimes', 'may'
    ]

    hedge_counts = {}
    for hedge in hedges:
        hedge_counts[f'hedge_{hedge}'] = text.lower().count(hedge)

    return {
        'hedge_counts': hedge_counts,
        'total_words': len(text.split())
    }

# Scoring functions (take features, return single score)
def calculate_complexity_score(features):
    if features['word_count'] == 0:
        return 0

    lexical_diversity = features['unique_words'] / features['word_count']
    avg_sentence_length = features['word_count'] / max(1, features['sentence_count'])
    subordinating_ratio = features['subordinating_count'] / features['word_count']

    return lexical_diversity + (avg_sentence_length / 100) + subordinating_ratio

def calculate_evidence_score(features):
    return sum(features.values())

def calculate_hedging_score_from_features(features):
    total_hedges = sum(features['hedge_counts'].values())
    return total_hedges / max(1, features['total_words'])

In [30]:
import re
import spacy
from collections import Counter
from convokit import PolitenessStrategies, Utterance, Speaker

class OptimizedFeatureExtractor:
    def __init__(self):
        # Initialize spaCy and politeness analyzer once
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except OSError:
            print("Please install spaCy English model: python -m spacy download en_core_web_sm")
            raise

        self.politeness_analyzer = PolitenessStrategies()
        self.temp_speaker = Speaker(id="temp_speaker")  # Reuse speaker object

        # Pre-compile all regex patterns
        self.evidence_patterns = [
            re.compile(r'http[s]?://\S+'),
            re.compile(r'according to'),
            re.compile(r'research shows'),
            re.compile(r'studies indicate'),
            re.compile(r'data suggests'),
            re.compile(r'statistics show'),
            re.compile(r'survey found'),
            re.compile(r'report states')
        ]

        # Pre-define word lists
        self.subordinating = ['because', 'since', 'although', 'while', 'whereas', 'if']
        self.hedges = [
            'might', 'could', 'perhaps', 'possibly', 'probably', 'likely',
            'seems', 'appears', 'suggests', 'indicates', 'tends to',
            'generally', 'usually', 'often', 'sometimes', 'may'
        ]

        # Create compiled patterns for hedges (for faster matching)
        self.hedge_pattern = re.compile(r'\b(?:' + '|'.join(re.escape(h) for h in self.hedges) + r')\b', re.IGNORECASE)
        self.subordinating_pattern = re.compile(r'\b(?:' + '|'.join(re.escape(s) for s in self.subordinating) + r')\b', re.IGNORECASE)

    def extract_all_features(self, text, extract_politeness=True):
        """Extract all features in a single pass through the text"""
        if not text or not text.strip():
            return self._empty_features()

        # Single text preprocessing
        lower_text = text.lower()
        words = text.split()
        sentences = [s.strip() for s in text.split('.') if s.strip()]

        # Basic counts
        word_count = len(words)
        unique_words = len(set(words))
        sentence_count = len(sentences)

        # Use regex for efficient pattern counting
        subordinating_count = len(self.subordinating_pattern.findall(text))

        # Evidence pattern matching (single pass)
        evidence_counts = {}
        for i, pattern in enumerate(self.evidence_patterns):
            evidence_counts[f'evidence_type_{i}'] = len(pattern.findall(lower_text))

        # Hedge counting (single regex pass)
        hedge_matches = self.hedge_pattern.findall(text)
        hedge_counts = Counter(match.lower() for match in hedge_matches)

        # Ensure all hedges are represented
        hedge_features = {}
        for hedge in self.hedges:
            hedge_features[f'hedge_{hedge}'] = hedge_counts.get(hedge, 0)

        # Politeness features (if requested)
        politeness_features = {}
        if extract_politeness:
            politeness_features = self._extract_politeness_features(text)

        return {
            # Complexity features
            'word_count': word_count,
            'unique_words': unique_words,
            'sentence_count': sentence_count,
            'subordinating_count': subordinating_count,

            # Evidence features
            **evidence_counts,

            # Hedging features
            **hedge_features,
            'total_hedges': sum(hedge_counts.values()),
            'total_words': word_count,

            # Politeness features
            **politeness_features
        }

    def _extract_politeness_features(self, text):
        """Extract politeness features using ConvoKit"""
        try:
            # Create utterance with reused speaker object
            temp_utterance = Utterance(
                id=f"temp_{id(text)}",  # Use object id for uniqueness
                speaker=self.temp_speaker,
                text=text
            )

            # Transform utterance to get politeness scores
            self.politeness_analyzer.transform_utterance(temp_utterance, spacy_nlp=self.nlp)

            # Access the politeness strategies from the utterance's metadata
            politeness_strategies_dict = temp_utterance.meta.get('politeness_strategies', {})

            return {
                'politeness_gratitude': politeness_strategies_dict.get('feature_politeness_==Gratitude==', 0),
                'politeness_apologizing': politeness_strategies_dict.get('feature_politeness_==Apologizing==', 0),
                'politeness_please': politeness_strategies_dict.get('feature_politeness_==Please==', 0),
                'politeness_indirect_greeting': politeness_strategies_dict.get('feature_politeness_==Indirect_(greeting)==', 0),
                'politeness_please_start': politeness_strategies_dict.get('feature_politeness_==Please_start==', 0),
                'politeness_hashedge': politeness_strategies_dict.get('feature_politeness_==HASHEDGE==', 0),
                'politeness_deference': politeness_strategies_dict.get('feature_politeness_==Deference==', 0),
            }
        except Exception as e:
            print(f"Error extracting politeness features: {e}")
            return {
                'politeness_gratitude': 0,
                'politeness_apologizing': 0,
                'politeness_please': 0,
                'politeness_indirect_greeting': 0,
                'politeness_please_start': 0,
                'politeness_hashedge': 0,
                'politeness_deference': 0,
            }

    def _empty_features(self):
        """Return empty features for invalid input"""
        evidence_features = {f'evidence_type_{i}': 0 for i in range(8)}
        hedge_features = {f'hedge_{hedge}': 0 for hedge in self.hedges}
        politeness_features = {
            'politeness_gratitude': 0,
            'politeness_apologizing': 0,
            'politeness_please': 0,
            'politeness_indirect_greeting': 0,
            'politeness_please_start': 0,
            'politeness_hashedge': 0,
            'politeness_deference': 0,
        }

        return {
            'word_count': 0,
            'unique_words': 0,
            'sentence_count': 0,
            'subordinating_count': 0,
            **evidence_features,
            **hedge_features,
            'total_hedges': 0,
            'total_words': 0,
            **politeness_features
        }

In [31]:
def calculate_complexity_score(features):
    if features['word_count'] == 0:
        return 0

    lexical_diversity = features['unique_words'] / features['word_count']
    avg_sentence_length = features['word_count'] / max(1, features['sentence_count'])
    subordinating_ratio = features['subordinating_count'] / features['word_count']

    return lexical_diversity + (avg_sentence_length / 100) + subordinating_ratio

def calculate_evidence_score(features):
    return sum(features[f'evidence_type_{i}'] for i in range(8))

def calculate_hedging_score_from_features(features):
    return features['total_hedges'] / max(1, features['total_words'])

def calculate_politeness_score(features):
    """
    Calculate overall politeness score from individual politeness features
    You can customize this weighting based on your research needs
    """
    politeness_features = [
        features.get('politeness_gratitude', 0),
        features.get('politeness_apologizing', 0),
        features.get('politeness_please', 0),
        features.get('politeness_indirect_greeting', 0),
        features.get('politeness_please_start', 0),
        features.get('politeness_hashedge', 0),
        features.get('politeness_deference', 0)
    ]

    # Simple sum approach (you might want to weight these differently)
    return sum(politeness_features)

def calculate_weighted_politeness_score(features):
    """
    Alternative: weighted politeness score based on research importance
    Adjust weights based on your literature review findings
    """
    weights = {
        'politeness_gratitude': 1.0,
        'politeness_apologizing': 0.8,
        'politeness_please': 1.2,
        'politeness_indirect_greeting': 0.6,
        'politeness_please_start': 1.0,
        'politeness_hashedge': 0.9,
        'politeness_deference': 1.1
    }

    weighted_score = 0
    for feature, weight in weights.items():
        weighted_score += features.get(feature, 0) * weight

    return weighted_score

In [32]:
extractor = OptimizedFeatureExtractor()

In [33]:
from tqdm import tqdm
from convokit import PolitenessStrategies
import re

# Convos has been set to test
# Init
i = 0
group_means = [] # Initialize as a list to append means
group_scores = []
utts_num = 0

# For each group
for group_idx, group in enumerate(tqdm(groups_tuple, desc="Processing groups")):
    # Initialize dictionary for this group's scores
    current_group_scores = {
        'interplay': [],
        'politeness': [],
        'argument_complexity': [],
        'evidence_markers': [],
        'hedging': []
    }

    count = 0
    for user_id, topic_timelines in group.items():
        # Process only 2 users for debugging
        if count < 2:

            user_start_time = time.time()
            user_change_points = 0

            for topic_timeline in topic_timelines.values():

                for change_point in topic_timeline.keys():  # Iterate through change points (keys)
                    utts_num += 1

                    user_change_points += 1

                    # TIME: Window extraction
                    start_time = time.time()
                    try:
                        candidate_convos = window_extractor.get_conversations_around_change_point(
                            change_point=change_point, corpus=corpus, test=True
                        )
                        window_time = time.time() - start_time
                        print(f'⏱️ Window extraction: {window_time:.3f}s')
                    except ValueError as e:
                        print(f"Skipping change point {change_point}: {e}")
                        continue


                    # TIME: Path extraction
                    start_time = time.time()
                    op_path_pairs = []
                    for candidate_convo in candidate_convos:
                        try:
                            op_path_pairs.extend(op_path_pairer.extract_rooted_path_from_candidate_convos(
                                [candidate_convo], user_id
                            ))
                        except ValueError as e:
                            print(f"Skipping conversation {candidate_convo.id}: {e}")
                            continue
                    path_time = time.time() - start_time
                    print(f'⏱️ Path extraction: {path_time:.3f}s')


                    # TIME: Preprocessing
                    start_time = time.time()
                    preprocessed_pairs = pair_preprocessor.concatenate_path_in_all_pairs(op_path_pairs)
                    preprocess_time = time.time() - start_time
                    print(f'⏱️ Preprocessing: {preprocess_time:.3f}s')


                    # TIME: Feature extraction (ENHANCED)
                    start_time = time.time()
                    interplay_features_list = []
                    politeness_features_list = []
                    # NEW: Feature lists for new predictors
                    argument_complexity_features_list = []
                    evidence_features_list = []
                    hedging_features_list = []

                    for op, paths in preprocessed_pairs:
                        for k, concatenated_utts in paths.items():
                            all_features = extractor.extract_all_features(concatenated_utts, extract_politeness=True)
                            # Existing feature extraction
                            interplay_features = persuasion_analyzer.calculate_interplay_features(
                                op.text, concatenated_utts, stop_words_set
                            )
                            interplay_features_list.append(interplay_features)

                            # Fixed politeness feature extraction
                            politeness_features = get_politeness_score(concatenated_utts)
                            politeness_features_list.append(all_features)

                            # NEW: Extract features (not scores) for new predictors
                            complexity_features = extract_argument_complexity_features(concatenated_utts)
                            argument_complexity_features_list.append(all_features)

                            evidence_features = extract_evidence_features(concatenated_utts)
                            evidence_features_list.append(all_features)

                            hedging_features = extract_hedging_features(concatenated_utts)
                            hedging_features_list.append(all_features)

                    feature_time = time.time() - start_time
                    print(f'⏱️ Feature extraction (enhanced): {feature_time:.3f}s')

                    # TIME: Score interplay (existing)
                    start_time = time.time()
                    interplay_scores = []
                    for interplay_features in interplay_features_list:
                        score = persuasion_analyzer.calculate_persuasion_score(interplay_features)
                        interplay_scores.append(score)

                    # Score politeness features (existing)
                    politeness_scores = []
                    for politeness_features in politeness_features_list:
                        politeness_total = sum(politeness_features.values())
                        politeness_scores.append(politeness_total)

                    # NEW: Score the new predictors
                    argument_complexity_scores = []
                    for complexity_features in argument_complexity_features_list:
                        score = calculate_complexity_score(complexity_features)
                        argument_complexity_scores.append(score)

                    evidence_scores = []
                    for evidence_features in evidence_features_list:
                        score = calculate_evidence_score(evidence_features)
                        evidence_scores.append(score)

                    hedging_scores = []
                    for hedging_features in hedging_features_list:
                        score = calculate_hedging_score_from_features(hedging_features)
                        hedging_scores.append(score)

                    scoring_time = time.time() - start_time
                    print(f'⏱️ Scoring: {scoring_time:.3f}s')

                    # Add all scores to current group (NEW STRUCTURE)
                    current_group_scores['interplay'].extend(interplay_scores)
                    current_group_scores['politeness'].extend(politeness_scores)
                    current_group_scores['argument_complexity'].extend(argument_complexity_scores)
                    current_group_scores['evidence_markers'].extend(evidence_scores)
                    current_group_scores['hedging'].extend(hedging_scores)

                    # Print total time for this change point
                    total_time = window_time + path_time + preprocess_time + feature_time + scoring_time
                    print(f'🔥 TOTAL for change point: {total_time:.3f}s\n')
                    break

            # TIME: End timing this user
            user_total_time = time.time() - user_start_time
            print(f'👤 USER {user_id} TOTAL: {user_total_time:.3f}s ({user_change_points} change points)')
            print(f'📊 Average per change point: {user_total_time/max(1, user_change_points):.3f}s\n')

            count += 1

        if count >= 10:
            break

    # Calculate means for each predictor for this group (ENHANCED)
    group_mean = {}
    for predictor_name, scores in current_group_scores.items():
        if scores:  # Check if we have scores
            group_mean[predictor_name] = sum(scores) / len(scores)
        else:
            group_mean[predictor_name] = 0

    # Append this group's means
    group_means.append(group_mean)
    group_scores.append(current_group_scores)

# Print the calculated group means for each predictor (ENHANCED)
print(f'\n=== GROUP COMPARISON ===')
for group_idx, group_mean in enumerate(group_means):
    print(f'\nGroup {group_idx + 1} Means:')
    for predictor, mean_score in group_mean.items():
        print(f'  {predictor}: {mean_score:.4f}')

# Print comparison between groups
if len(group_means) >= 2:
    print(f'\n=== GROUP 1 vs GROUP 2 COMPARISON ===')
    for predictor in group_means[0].keys():
        group1_mean = group_means[0][predictor]
        group2_mean = group_means[1][predictor]
        difference = group1_mean - group2_mean
        percent_diff = (difference / group2_mean * 100) if group2_mean != 0 else 0
        print(f'{predictor}:')
        print(f'  Group 1: {group1_mean:.4f}')
        print(f'  Group 2: {group2_mean:.4f}')
        print(f'  Difference: {difference:.4f} ({percent_diff:+.1f}%)')
        print()

Processing groups:   0%|          | 0/2 [00:00<?, ?it/s]

⏱️ Window extraction: 0.000s
⏱️ Path extraction: 0.015s
⏱️ Preprocessing: 0.000s
⏱️ Feature extraction (enhanced): 0.385s
⏱️ Scoring: 0.000s
🔥 TOTAL for change point: 0.400s

⏱️ Window extraction: 0.001s
⏱️ Path extraction: 0.056s
⏱️ Preprocessing: 0.000s
⏱️ Feature extraction (enhanced): 2.129s
⏱️ Scoring: 0.000s
🔥 TOTAL for change point: 2.187s

⏱️ Window extraction: 0.000s
⏱️ Path extraction: 0.014s
⏱️ Preprocessing: 0.000s
⏱️ Feature extraction (enhanced): 0.934s
⏱️ Scoring: 0.000s
🔥 TOTAL for change point: 0.948s

👤 USER seltaeb4 TOTAL: 3.536s (3 change points)
📊 Average per change point: 1.179s

⏱️ Window extraction: 0.000s
⏱️ Path extraction: 0.043s
⏱️ Preprocessing: 0.003s
⏱️ Feature extraction (enhanced): 27.104s
⏱️ Scoring: 0.002s
🔥 TOTAL for change point: 27.153s

⏱️ Window extraction: 0.000s
⏱️ Path extraction: 0.224s
⏱️ Preprocessing: 0.017s


Processing groups:   0%|          | 0/2 [00:58<?, ?it/s]


KeyboardInterrupt: 

In [25]:
# ========== NEW: STATISTICAL ANALYSIS SECTION ==========

def perform_statistical_tests(group1_scores, group2_scores, predictor_name, alpha=0.05):
    """
    Perform comprehensive statistical tests between two groups for a given predictor.

    Args:
        group1_scores: List of scores for group 1
        group2_scores: List of scores for group 2
        predictor_name: Name of the predictor being tested
        alpha: Significance level (default 0.05)

    Returns:
        dict: Results of all statistical tests
    """
    if not group1_scores or not group2_scores:
        return {
            'valid': False,
            'reason': 'Empty score lists'
        }

    # Convert to numpy arrays
    g1 = np.array(group1_scores)
    g2 = np.array(group2_scores)

    # Basic descriptive statistics
    results = {
        'valid': True,
        'predictor': predictor_name,
        'group1_n': len(g1),
        'group2_n': len(g2),
        'group1_mean': np.mean(g1),
        'group2_mean': np.mean(g2),
        'group1_std': np.std(g1, ddof=1),
        'group2_std': np.std(g2, ddof=1),
        'group1_median': np.median(g1),
        'group2_median': np.median(g2),
        'mean_difference': np.mean(g1) - np.mean(g2),
    }

    # Effect size (Cohen's d)
    pooled_std = np.sqrt(((len(g1) - 1) * results['group1_std']**2 +
                         (len(g2) - 1) * results['group2_std']**2) /
                        (len(g1) + len(g2) - 2))
    results['cohens_d'] = results['mean_difference'] / pooled_std if pooled_std > 0 else 0

    # Interpret effect size
    abs_d = abs(results['cohens_d'])
    if abs_d < 0.2:
        effect_size_interpretation = "negligible"
    elif abs_d < 0.5:
        effect_size_interpretation = "small"
    elif abs_d < 0.8:
        effect_size_interpretation = "medium"
    else:
        effect_size_interpretation = "large"
    results['effect_size_interpretation'] = effect_size_interpretation

    # Test for equal variances (Levene's test)
    try:
        levene_stat, levene_p = levene(g1, g2)
        results['levene_statistic'] = levene_stat
        results['levene_p_value'] = levene_p
        results['equal_variances'] = levene_p > alpha
    except Exception as e:
        results['levene_error'] = str(e)
        results['equal_variances'] = True  # Assume equal variances if test fails

    # Independent samples t-test
    try:
        # Use equal_var parameter based on Levene's test
        equal_var = results.get('equal_variances', True)
        t_stat, t_p = ttest_ind(g1, g2, equal_var=equal_var)
        results['t_statistic'] = t_stat
        results['t_p_value'] = t_p
        results['t_significant'] = t_p < alpha

        # Calculate degrees of freedom
        if equal_var:
            results['t_df'] = len(g1) + len(g2) - 2
        else:
            # Welch's t-test degrees of freedom
            s1_sq, s2_sq = results['group1_std']**2, results['group2_std']**2
            n1, n2 = len(g1), len(g2)
            results['t_df'] = ((s1_sq/n1 + s2_sq/n2)**2) / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1))

    except Exception as e:
        results['t_test_error'] = str(e)

    # Mann-Whitney U test (non-parametric alternative)
    try:
        u_stat, u_p = mannwhitneyu(g1, g2, alternative='two-sided')
        results['mannwhitney_u_statistic'] = u_stat
        results['mannwhitney_p_value'] = u_p
        results['mannwhitney_significant'] = u_p < alpha
    except Exception as e:
        results['mannwhitney_error'] = str(e)

    # 95% Confidence interval for the difference in means
    try:
        # Pooled standard error
        n1, n2 = len(g1), len(g2)
        pooled_se = pooled_std * np.sqrt(1/n1 + 1/n2)

        # Critical t-value
        df = results.get('t_df', n1 + n2 - 2)
        t_critical = stats.t.ppf(1 - alpha/2, df)

        # Confidence interval
        margin_of_error = t_critical * pooled_se
        results['ci_lower'] = results['mean_difference'] - margin_of_error
        results['ci_upper'] = results['mean_difference'] + margin_of_error
    except Exception as e:
        results['ci_error'] = str(e)

    return results

def format_statistical_results(results):
    """Format statistical results for readable output."""
    if not results['valid']:
        return f"❌ {results['predictor']}: {results['reason']}"

    output = f"\n📊 {results['predictor'].upper().replace('_', ' ')} ANALYSIS:\n"
    output += f"{'='*50}\n"

    # Descriptive statistics
    output += f"Sample sizes: Group 1: n={results['group1_n']}, Group 2: n={results['group2_n']}\n"
    output += f"Group 1: M={results['group1_mean']:.4f} (SD={results['group1_std']:.4f}), Mdn={results['group1_median']:.4f}\n"
    output += f"Group 2: M={results['group2_mean']:.4f} (SD={results['group2_std']:.4f}), Mdn={results['group2_median']:.4f}\n"
    output += f"Mean difference: {results['mean_difference']:.4f}\n"

    # Effect size
    output += f"Effect size (Cohen's d): {results['cohens_d']:.4f} ({results['effect_size_interpretation']})\n"

    # Variance equality test
    if 'levene_p_value' in results:
        equal_var_str = "✅ Equal" if results['equal_variances'] else "❌ Unequal"
        output += f"Levene's test: F={results['levene_statistic']:.4f}, p={results['levene_p_value']:.4f} ({equal_var_str} variances)\n"

    # t-test results
    if 't_p_value' in results:
        significance = "✅ SIGNIFICANT" if results['t_significant'] else "❌ Not significant"
        test_type = "Welch's t-test" if not results.get('equal_variances', True) else "Student's t-test"
        output += f"{test_type}: t({results['t_df']:.1f})={results['t_statistic']:.4f}, p={results['t_p_value']:.4f} {significance}\n"

    # Mann-Whitney U test
    if 'mannwhitney_p_value' in results:
        significance = "✅ SIGNIFICANT" if results['mannwhitney_significant'] else "❌ Not significant"
        output += f"Mann-Whitney U: U={results['mannwhitney_u_statistic']:.1f}, p={results['mannwhitney_p_value']:.4f} {significance}\n"

    # Confidence interval
    if 'ci_lower' in results:
        output += f"95% CI for difference: [{results['ci_lower']:.4f}, {results['ci_upper']:.4f}]\n"

    return output

# Print the calculated group means for each predictor (ENHANCED)
print(f'\n=== GROUP COMPARISON ===')
for group_idx, group_mean in enumerate(group_means):
    print(f'\nGroup {group_idx + 1} Means:')
    for predictor, mean_score in group_mean.items():
        print(f'  {predictor}: {mean_score:.4f}')

# Print comparison between groups
if len(group_means) >= 2:
    print(f'\n=== GROUP 1 vs GROUP 2 COMPARISON ===')
    for predictor in group_means[0].keys():
        group1_mean = group_means[0][predictor]
        group2_mean = group_means[1][predictor]
        difference = group1_mean - group2_mean
        percent_diff = (difference / group2_mean * 100) if group2_mean != 0 else 0
        print(f'{predictor}:')
        print(f'  Group 1: {group1_mean:.4f}')
        print(f'  Group 2: {group2_mean:.4f}')
        print(f'  Difference: {difference:.4f} ({percent_diff:+.1f}%)')
        print()

# ========== NEW: COMPREHENSIVE STATISTICAL ANALYSIS ==========

if len(group_scores) >= 2:
    print(f'\n🔬 STATISTICAL SIGNIFICANCE TESTING')
    print(f'=' * 60)

    # Store all test results for summary
    all_test_results = []
    significant_predictors = []

    # Test each predictor
    for predictor in group_scores[0].keys():
        group1_scores = group_scores[0][predictor]
        group2_scores = group_scores[1][predictor]

        # Perform statistical tests
        test_results = perform_statistical_tests(group1_scores, group2_scores, predictor)
        all_test_results.append(test_results)

        # Print formatted results
        print(format_statistical_results(test_results))

        # Track significant predictors
        if test_results.get('t_significant', False):
            significant_predictors.append(predictor)

    # Multiple comparison correction (Bonferroni)
    print(f'\n🎯 MULTIPLE COMPARISON CORRECTION')
    print(f'=' * 40)
    n_tests = len([r for r in all_test_results if r['valid']])
    bonferroni_alpha = 0.05 / n_tests if n_tests > 0 else 0.05
    print(f"Number of tests: {n_tests}")
    print(f"Bonferroni-corrected α: {bonferroni_alpha:.4f}")

    bonferroni_significant = []
    for result in all_test_results:
        if result['valid'] and 't_p_value' in result:
            is_significant = result['t_p_value'] < bonferroni_alpha
            status = "✅ SIGNIFICANT" if is_significant else "❌ Not significant"
            print(f"{result['predictor']}: p={result['t_p_value']:.4f} {status}")
            if is_significant:
                bonferroni_significant.append(result['predictor'])

    # Summary of findings
    print(f'\n📋 SUMMARY OF FINDINGS')
    print(f'=' * 30)
    print(f"Total predictors tested: {n_tests}")
    print(f"Significant at α=0.05: {len(significant_predictors)} ({len(significant_predictors)/n_tests*100:.1f}%)")
    print(f"Significant after Bonferroni correction: {len(bonferroni_significant)} ({len(bonferroni_significant)/n_tests*100:.1f}%)")

    if significant_predictors:
        print(f"\nSignificant predictors (uncorrected): {', '.join(significant_predictors)}")
    if bonferroni_significant:
        print(f"Significant predictors (Bonferroni): {', '.join(bonferroni_significant)}")

    # Effect size summary
    print(f'\n📏 EFFECT SIZES SUMMARY')
    print(f'=' * 25)
    for result in all_test_results:
        if result['valid']:
            direction = "Group 1 > Group 2" if result['mean_difference'] > 0 else "Group 2 > Group 1"
            print(f"{result['predictor']}: d={result['cohens_d']:.3f} ({result['effect_size_interpretation']}, {direction})")

else:
    print("\n⚠️  Need at least 2 groups for statistical comparison")

print(f'\n🏁 Analysis completed. Total utterances processed: {utts_num}')


=== GROUP COMPARISON ===

Group 1 Means:
  interplay: 0.8318
  politeness: 0.6993
  argument_complexity: 0.8970
  evidence_markers: 0.9308
  hedging: 0.0044

Group 2 Means:
  interplay: 0.7555
  politeness: 0.4190
  argument_complexity: 1.0112
  evidence_markers: 0.4095
  hedging: 0.0037

=== GROUP 1 vs GROUP 2 COMPARISON ===
interplay:
  Group 1: 0.8318
  Group 2: 0.7555
  Difference: 0.0763 (+10.1%)

politeness:
  Group 1: 0.6993
  Group 2: 0.4190
  Difference: 0.2803 (+66.9%)

argument_complexity:
  Group 1: 0.8970
  Group 2: 1.0112
  Difference: -0.1142 (-11.3%)

evidence_markers:
  Group 1: 0.9308
  Group 2: 0.4095
  Difference: 0.5213 (+127.3%)

hedging:
  Group 1: 0.0044
  Group 2: 0.0037
  Difference: 0.0007 (+19.5%)


🔬 STATISTICAL SIGNIFICANCE TESTING

📊 INTERPLAY ANALYSIS:
Sample sizes: Group 1: n=1041, Group 2: n=105
Group 1: M=0.8318 (SD=0.0823), Mdn=0.8208
Group 2: M=0.7555 (SD=0.0779), Mdn=0.7694
Mean difference: 0.0763
Effect size (Cohen's d): 0.9311 (large)


📊 POLITE