In [6]:
# For runpod-jupyter or local (run twice)
import sys
import os

# Change to the correct working directory (same as Jupyter)
# os.chdir('/workspace/temporal_belief_analysis/notebooks')
# print("Changed working directory to:", os.getcwd())

# Absolute path to src directory
src_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

from temporal_belief.core.timeline_building import TimelineBuilder

ImportError: cannot import name 'STANCE_LABELS' from 'temporal_belief.utils.config' (/Users/leonidas/GitHub/temporal_belief_analysis/src/temporal_belief/utils/config.py)

In [3]:
# Run twice
# import unsloth
# import unsloth_zoo
from convokit import Corpus, download
import convokit

In [4]:
# Load a corpus:
corpus = Corpus(filename="/Users/leonidas/.convokit/saved-corpora/pd_corpus_with_stances1000_chronological")

In [6]:
import numpy as np
from scipy.stats import ttest_ind, mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection
from collections import Counter
import logging

class BeliefChangeDetector:
    """Sliding window change detection with proper statistical significance."""

    def __init__(self, window_size=3, significance_level=0.05):
        self.window_size = window_size
        self.alpha = significance_level
        self.stance_values = {
            'strongly_against': -2, 'moderately_against': -1,
            'neutral': 0, 'moderately_favor': 1, 'strongly_favor': 2
        }

    def detect_simple_stance_changes(self, topic_timeline):
        """
        Simple method: detect any change from previous stance for a single topic.

        Args:
            topic_timeline: Dictionary with utterance_ids as keys and stance strings as values
                           e.g., {'utterance_123': 'moderately_favor', 'utterance_456': 'neutral', ...}
                           NOTE: This should already be chronologically sorted from TimelineBuilder

        Returns:
            List of detected changes with basic info
        """
        if len(topic_timeline) < 2:
            return []

        changes = []
        timeline_items = list(topic_timeline.items())  # Convert to list of (utterance_id, stance) pairs

        for i in range(1, len(timeline_items)):
            current_utterance_id, current_stance = timeline_items[i]
            previous_utterance_id, previous_stance = timeline_items[i-1]

            # Check if stance changed
            if current_stance != previous_stance:
                change = {
                    'position': i,
                    'current_utterance_id': current_utterance_id,
                    'previous_utterance_id': previous_utterance_id,
                    'from_stance': previous_stance,
                    'to_stance': current_stance,
                    'change_type': self._classify_change_direction(previous_stance, current_stance),
                    'change_magnitude': self._calculate_simple_magnitude(previous_stance, current_stance)
                }
                changes.append(change)

        return changes

    def _classify_change_direction(self, from_stance, to_stance):
        """Classify the direction of stance change."""
        from_value = self.stance_values.get(from_stance, 0)
        to_value = self.stance_values.get(to_stance, 0)

        if to_value > from_value:
            return 'more_favorable'
        elif to_value < from_value:
            return 'less_favorable'
        else:
            return 'neutral_shift'

    def _calculate_simple_magnitude(self, from_stance, to_stance):
        """Calculate the magnitude of stance change."""
        from_value = self.stance_values.get(from_stance, 0)
        to_value = self.stance_values.get(to_stance, 0)
        return abs(to_value - from_value)

    def detect_changes_with_significance(self, user_timeline):
        """Detect changes with statistical significance testing."""

        if len(user_timeline) < self.window_size * 2:
            return []

        # Convert to numeric sequence
        stance_sequence = [self.stance_values.get(stance, 0)
                          for stance in user_timeline.values()]

        changes = []
        p_values = []

        # Sliding window approach
        for i in range(self.window_size, len(stance_sequence) - self.window_size):

            # Left window (before potential change)
            left_window = stance_sequence[i - self.window_size:i]

            # Right window (after potential change)
            right_window = stance_sequence[i:i + self.window_size]

            # Statistical test: Are these two windows significantly different?
            statistic, p_value = self.two_sample_test(left_window, right_window)

            p_values.append(p_value)

            # Significant change detected
            if p_value < self.alpha:
                change_magnitude = abs(np.mean(right_window) - np.mean(left_window))

                changes.append({
                    'position': i,
                    'p_value': p_value,
                    'test_statistic': statistic,
                    'magnitude': change_magnitude,
                    'left_mean': np.mean(left_window),
                    'right_mean': np.mean(right_window),
                    'significance_level': self.alpha,
                    'statistically_significant': True
                })

        return changes, p_values

    def two_sample_test(self, left_window, right_window):
        """Statistical test for difference between two windows."""
        from scipy.stats import ttest_ind, mannwhitneyu

        # Use Mann-Whitney U test (non-parametric, more robust)
        # Good for ordinal data like stance scales
        try:
            statistic, p_value = mannwhitneyu(left_window, right_window,
                                            alternative='two-sided')
            return statistic, p_value
        except ValueError:
            # Fallback to t-test if Mann-Whitney fails
            statistic, p_value = ttest_ind(left_window, right_window)
            return statistic, p_value

    def multiple_testing_correction(self, p_values):
        """Correct for multiple testing using Benjamini-Hochberg."""
        from statsmodels.stats.multitest import fdrcorrection

        rejected, p_corrected = fdrcorrection(p_values, alpha=self.alpha)
        return rejected, p_corrected

In [16]:
timeline_builder = TimelineBuilder(corpus, min_posts_per_topic=3, min_topics_per_user=1)
all_timelines = timeline_builder.build_timelines()

# Get a specific user's timeline for a specific topic
user_id = "some_user_id"
topic = "healthcare"
topic_timeline = all_timelines[user_id][topic]  # This is {utterance_id: stance}

# Initialize detector and detect changes
detector = BeliefChangeDetector()
changes = detector.detect_simple_stance_changes(topic_timeline)

# Print the results
print(f"Detected {len(changes)} stance changes for user {user_id} on topic {topic}:")
for change in changes:
    print(f"  {change['from_stance']} → {change['to_stance']} (magnitude: {change['change_magnitude']})")

NameError: name 'TimelineBuilder' is not defined