In [1]:
# For runpod-jupyter or local (run twice)
import sys
import os

# Change to the correct working directory (same as Jupyter)
# os.chdir('/workspace/temporal_belief_analysis/notebooks')
# print("Changed working directory to:", os.getcwd())

# Absolute path to src directory
src_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

from temporal_belief.core.timeline_building import TimelineBuilder

In [2]:
# Run twice
# import unsloth
# import unsloth_zoo
from convokit import Corpus, download
import convokit

In [3]:
# Load a corpus:
corpus = Corpus(filename="/Users/leonidas/.convokit/saved-corpora/pd_corpus_with_stances1000_chronological")

In [None]:
# OPTIONAL TO ADD LATER:
# # 1. Better logging/reporting
# def generate_analysis_report(self, results):
#     """Generate analysis summary"""
#
# # 2. Visualization
# def plot_belief_timeline(self, user_timeline, detected_changes):
#     """Plot belief evolution with change points marked"""
#
# # 3. Effect size calculation
# def calculate_effect_size(self, left_window, right_window):
#     """Cohen's d or similar for practical significance"""
#
# # 4. Confidence intervals
# def belief_change_confidence_interval(self, change_data):
#     """95% CI for magnitude of belief change"""

In [4]:
import numpy as np
from scipy.stats import ttest_ind, mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection
from collections import Counter
import logging

class BeliefChangeDetector:
    """Sliding window change detection with proper statistical significance."""

    def __init__(self, window_size=3, significance_level=0.05):
        self.window_size = window_size
        self.alpha = significance_level
        self.stance_values = {
            'strongly_against': -2, 'moderately_against': -1,
            'neutral': 0, 'moderately_favor': 1, 'strongly_favor': 2
        }

    def detect_simple_stance_changes(self, topic_timeline):

        if len(topic_timeline) < 2:
            return []

        changes = []
        timeline_items = list(topic_timeline.items())  # Convert to list of (utterance_id, stance) pairs

        for i in range(1, len(timeline_items)):
            current_utterance_id, current_stance = timeline_items[i]
            previous_utterance_id, previous_stance = timeline_items[i-1]

            # Check if stance changed
            if current_stance != previous_stance:
                change = {
                    'position': i,
                    'current_utterance_id': current_utterance_id,
                    'previous_utterance_id': previous_utterance_id,
                    'from_stance': previous_stance,
                    'to_stance': current_stance,
                    'change_type': self._classify_change_direction(previous_stance, current_stance),
                    'change_magnitude': self._calculate_simple_magnitude(previous_stance, current_stance)
                }
                changes.append(change)

        return changes

    def _classify_change_direction(self, from_stance, to_stance):
        """Classify the direction of stance change."""
        from_value = self.stance_values.get(from_stance, 0)
        to_value = self.stance_values.get(to_stance, 0)

        if to_value > from_value:
            return 'more_favorable'
        elif to_value < from_value:
            return 'less_favorable'
        else:
            return 'neutral_shift'

    def _calculate_simple_magnitude(self, from_stance, to_stance):
        """Calculate the magnitude of stance change."""
        from_value = self.stance_values.get(from_stance, 0)
        to_value = self.stance_values.get(to_stance, 0)
        return abs(to_value - from_value)

    def detect_changes_with_significance(self, topic_timeline):
        """Detect changes with statistical significance testing."""

        if len(topic_timeline) < self.window_size * 2:
            return [], [], []

        # Convert to lists to maintain order and get IDs
        timeline_items = list(topic_timeline.items())  # [(utterance_id, stance), ...]
        stance_sequence = [self.stance_values.get(stance, 0) for _, stance in timeline_items]

        potential_changes = []
        p_values = []

        # Sliding window approach
        for i in range(self.window_size, len(stance_sequence) - self.window_size):

            # Left window (before potential change)
            left_window = stance_sequence[i - self.window_size:i]

            # Right window (after potential change)
            right_window = stance_sequence[i:i + self.window_size]

            # Statistical test: Are these two windows significantly different?
            statistic, p_value = self.two_sample_test(left_window, right_window)

            p_values.append(p_value)

            # Store potential change info with just the key utterance ID
            change_magnitude = abs(np.mean(right_window) - np.mean(left_window))
            potential_changes.append({
                'position': i,
                'utterance_id': timeline_items[i][0],  # The utterance where change detected
                'p_value': p_value,
                'test_statistic': statistic,
                'magnitude': change_magnitude,
                'left_mean': np.mean(left_window),
                'right_mean': np.mean(right_window),
                'left_window': left_window.copy(),
                'right_window': right_window.copy()
            })

        # Apply FDR correction to all p-values
        if not p_values:
            return [], [], []

        rejected, p_corrected = self.multiple_testing_correction(p_values)

        # Keep only changes that survive FDR correction
        significant_changes = []
        for i, change in enumerate(potential_changes):
            if rejected[i]:  # Survives FDR correction
                change.update({
                    'p_corrected': p_corrected[i],
                    'statistically_significant': True,
                    'survives_fdr_correction': True,
                    'significance_level': self.alpha
                })
                significant_changes.append(change)

        return significant_changes, p_values, p_corrected

    def two_sample_test(self, left_window, right_window):
        """Statistical test for difference between two windows."""
        # Use Mann-Whitney U test (non-parametric, more robust)
        try:
            statistic, p_value = mannwhitneyu(left_window, right_window,
                                            alternative='two-sided')
            return statistic, p_value
        except ValueError:
            # Fallback to t-test if Mann-Whitney fails
            statistic, p_value = ttest_ind(left_window, right_window)
            return statistic, p_value

    def multiple_testing_correction(self, p_values):
        """Correct for multiple testing using Benjamini-Hochberg."""
        rejected, p_corrected = fdrcorrection(p_values, alpha=self.alpha)
        return rejected, p_corrected

    def analyze_user_belief_changes(self, user_timeline):
        """Analyze belief changes across all topics for a user."""
        all_changes = {}

        for topic, topic_timeline in user_timeline.items():
            changes = self.detect_changes_with_significance(topic_timeline)
            all_changes[topic] = changes

        return all_changes

In [None]:
# Detect changes with significance:
timeline_builder = TimelineBuilder(corpus, min_posts_per_topic=3, min_topics_per_user=1)
all_timelines = timeline_builder.build_timelines()

# Get a specific user's timeline for a specific topic
user_id = "pixel8"
topic = "healthcare policy"
topic_timeline = all_timelines[user_id][topic]  # This is {utterance_id: stance}

# Initialize detector and detect changes
detector = BeliefChangeDetector()
significant_changes, p_values, p_corrected = detector.detect_changes_with_significance(topic_timeline)

# Print the results
print(f"Detected {len(significant_changes)} statistically significant stance changes for user {user_id} on topic {topic}:")
for change in significant_changes:
    print(f"  {change['stance_before']} → {change['stance_after']} (magnitude: {change['magnitude']:.3f}, p={change['p_corrected']:.4f})")

In [6]:
# Detect simple stance change:
timeline_builder = TimelineBuilder(corpus, min_posts_per_topic=3, min_topics_per_user=1)
all_timelines = timeline_builder.build_timelines()

# Get a specific user's timeline for a specific topic
user_id = "pixel8"
topic = "healthcare policy"
topic_timeline = all_timelines[user_id][topic]  # This is {utterance_id: stance}

# Initialize detector and detect changes
detector = BeliefChangeDetector()
changes = detector.detect_simple_stance_changes(topic_timeline)

# Print the results
print(f"Detected {len(changes)} stance changes for user {user_id} on topic {topic}:")
for change in changes:
    print(f"  {change['from_stance']} → {change['to_stance']} (magnitude: {change['change_magnitude']})")

Detected 0 statistically significant stance changes for user pixel8 on topic healthcare policy:


In [None]:
# Run detection for all topics for a user - NOT TESTED:
# Get complete user timeline
user_timeline = timelines["pixel8"]  # All topics for this user

# Analyze changes across all topics
detector = BeliefChangeDetector()
all_changes = detector.analyze_user_belief_changes(user_timeline)

# Results
for topic, changes in all_changes.items():
    print(f"Topic: {topic}")
    for change in changes:
        print(f"  Change at position {change['position']}: magnitude {change['magnitude']}")

In [27]:
# All users that meet the criteria:
print("Available users:")
print(list(all_timelines.keys())[:10])

Available users:
['[deleted]', 'dtmike07', 'snorky94', 'pixel8', 'funkinthetrunk', 'orbit1979', 'HighAltitudeNoOpen', 'cunning001', 'izagig', 'sickpharaoh']


In [15]:
# What topics the users have posted about:
for user_id in list(all_timelines.keys())[:5]:  # Check first 5 users
    topics = list(all_timelines[user_id].keys())
    print(f"{user_id}: {topics}")
    break

[deleted]: ['climate change and energy policy', 'media and political commentary', 'taxation and government spending', 'political figures and campaigns', 'economic policy', 'voting rights and elections', 'immigration policy', 'healthcare policy', 'congressional politics', 'criminal justice and policing', 'political parties and ideology', 'civil rights and social issues', 'education policy', 'foreign policy and defense']


In [24]:
# confidence score:
utterances = list(corpus.iter_utterances())
print(utterances[1].meta)

ConvoKitMeta({'score': 29, 'top_level_comment': None, 'retrieved_on': -1, 'gilded': -1, 'gildings': None, 'subreddit': 'PoliticalDiscussion', 'stickied': False, 'permalink': '/r/PoliticalDiscussion/comments/nz1xu/congrats_rpoliticaldiscussion_you_are_turning/', 'author_flair_text': '', 'detected_stance': 'moderately_against', 'stance_confidence': 0.8540321985880533, 'stance_scores': {'strongly_favor': 0.0016047263949682626, 'moderately_favor': 0.5134096046288809, 'neutral': 0.0072105322033166885, 'moderately_against': 0.8540321985880533, 'strongly_against': 0.3021060957883795}})
