In [None]:
# Clone Repo:
!git clone https://github.com/Sharp-4rth/temporal_belief_analysis.git

In [None]:
# Need to restart after:
!pip install convokit[llm]
!pip install convokit
!pip install statsmodels

In [None]:
import sys
import os
os.chdir('/content/temporal_belief_analysis/notebooks')
print("Changed working directory to:", os.getcwd())

# Absolute path to src directory
src_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

In [None]:
import time
!pip install gdown
import zipfile
import nltk
from nltk.corpus import stopwords
from convokit import Corpus, download
import convokit
from temporal_belief.core.timeline_building import TimelineBuilder
from temporal_belief.core.change_detection import ChangeDetector
from temporal_belief.core.window_extraction import WindowExtractor
from temporal_belief.core.op_path_pairing import OpPathPairer
from temporal_belief.data.preprocessors import ChangeDetectorPreprocessor
from temporal_belief.data.preprocessors import PairPreprocessor
from temporal_belief.data.preprocessors import ExtractFeatures
from temporal_belief.data.preprocessors import GroupPreprocessor
from temporal_belief.core.interplay import Interplay
import numpy as np
nltk.download('stopwords')

In [None]:
# Download and unzip with python:
!gdown "https://drive.google.com/file/d/1AIrstrzE259fcVyxJQW4-RwvAkoUyK1x/view?usp=sharing" -O "/content/temporal_belief_analysis/pd_corpus_with_stances_fine_tuned.zip" --fuzzy
zipfile.ZipFile("/content/temporal_belief_analysis/pd_corpus_with_stances_fine_tuned.zip").extractall("/content/temporal_belief_analysis")

In [6]:
CORPUS_PATH = "/content/temporal_belief_analysis/pd_corpus_with_stances_fine_tuned"
corpus = Corpus(filename=CORPUS_PATH)

In [5]:
timeline_builder = TimelineBuilder(corpus)
timelines = timeline_builder.build_timelines()

In [None]:
window_extractor = WindowExtractor(corpus, timelines)
window_extractor.build_global_user_conversations_index()

In [14]:
op_path_pairer = OpPathPairer(corpus, timelines)
pair_preprocessor = PairPreprocessor()
feature_extractor = ExtractFeatures()
persuasion_analyzer = Interplay()

In [15]:
change_detector = ChangeDetector()

In [24]:
groups_preprocessor = GroupPreprocessor()

In [25]:
groups = change_detector.get_two_groups(timelines)

In [None]:
groups_tuple = (groups['with_changes'], groups['no_changes'])
groups_tuple = groups_preprocessor.filter_groups(groups, groups_tuple)
target_utterances = groups_preprocessor.get_target(groups_tuple)

In [20]:
from tqdm import tqdm
stop_words_set = set(stopwords.words('english'))

group_means = []
group_scores = []


# For each group
for group_idx, group in enumerate(tqdm(groups_tuple, desc="Processing groups")):
    # Initialize dictionary for this group's scores (one score per utterance)
    current_group_scores = {
        'interplay': [],
        'politeness': [],
        'argument_complexity': [],
        'evidence_markers': [],
        'hedging': []
    }

    utterances_processed = 0
    target_reached = False  # Flag to control all nested loops

    for user_id, topic_timelines in group.items():
        if target_reached:  # Check flag at user level
            break

        user_start_time = time.time()
        user_change_points = 0

        for topic_timeline in topic_timelines.values():
            if target_reached:  # Check flag at topic level
                break

            for change_point in topic_timeline.keys():  # Each utterance/change point
                if utterances_processed >= target_utterances:
                    target_reached = True  # Set flag instead of just breaking
                    break

                # utts_num += 1
                user_change_points += 1
                utterances_processed += 1

                # Window extraction
                start_time = time.time()
                try:
                    candidate_convos = window_extractor.get_conversations_around_change_point(
                        change_point=change_point, corpus=corpus, test=True
                    )
                    window_time = time.time() - start_time
                    print(f'Window extraction: {window_time:.3f}s')
                except ValueError as e:
                    print(f"Skipping change point {change_point}: {e}")
                    continue

                # Path extraction
                start_time = time.time()
                timeout_duration = 0.25
                op_path_pairs = []

                for candidate_convo in candidate_convos:
                    if time.time() - start_time > timeout_duration:
                        print(f"Path extraction timeout reached ({timeout_duration}s)")
                        break

                    try:
                        op_path_pairs.extend(op_path_pairer.extract_rooted_path_from_candidate_convos(
                            [candidate_convo], user_id
                        ))
                    except ValueError as e:
                        print(f"Skipping conversation {candidate_convo.id}: {e}")
                        continue

                path_time = time.time() - start_time
                print(f'Path extraction: {path_time:.3f}s')

                # Preprocessing
                start_time = time.time()
                preprocessed_pairs = pair_preprocessor.concatenate_path_in_all_pairs(op_path_pairs)
                preprocess_time = time.time() - start_time
                print(f'Preprocessing: {preprocess_time:.3f}s')

                # Feature extraction - collect ALL scores for this utterance
                start_time = time.time()
                utterance_interplay_scores = []
                utterance_politeness_scores = []
                utterance_complexity_scores = []
                utterance_evidence_scores = []
                utterance_hedging_scores = []

                for op, paths in preprocessed_pairs:
                    for k, concatenated_utts in paths.items():
                        # Extract features
                        interplay_features = persuasion_analyzer.calculate_interplay_features(
                            op.text, concatenated_utts, stop_words_set
                        )
                        politeness_features = feature_extractor.get_politeness_features(concatenated_utts)
                        complexity_features = feature_extractor.extract_argument_complexity_features(concatenated_utts)
                        evidence_features = feature_extractor.extract_evidence_features(concatenated_utts)
                        hedging_features = feature_extractor.extract_hedging_features(concatenated_utts)

                        # Calculate scores
                        interplay_score = persuasion_analyzer.calculate_persuasion_score(interplay_features)
                        politeness_score = sum(politeness_features.values())
                        complexity_score = feature_extractor.calculate_complexity_score(complexity_features)
                        evidence_score = feature_extractor.calculate_evidence_score(evidence_features)
                        hedging_score = feature_extractor.calculate_hedging_score_from_features(hedging_features)

                        # Collect all scores for this utterance
                        utterance_interplay_scores.append(interplay_score)
                        utterance_politeness_scores.append(politeness_score)
                        utterance_complexity_scores.append(complexity_score)
                        utterance_evidence_scores.append(evidence_score)
                        utterance_hedging_scores.append(hedging_score)

                feature_time = time.time() - start_time

                # Take mean across all paths for this single utterance
                start_time = time.time()
                if utterance_interplay_scores:  # Only if we have scores
                    # One score per utterance (mean of all conversation paths)
                    utterance_mean_interplay = np.mean(utterance_interplay_scores)
                    utterance_mean_politeness = np.mean(utterance_politeness_scores)
                    utterance_mean_complexity = np.mean(utterance_complexity_scores)
                    utterance_mean_evidence = np.mean(utterance_evidence_scores)
                    utterance_mean_hedging = np.mean(utterance_hedging_scores)

                    # Add ONE score per utterance to group scores
                    current_group_scores['interplay'].append(utterance_mean_interplay)
                    current_group_scores['politeness'].append(utterance_mean_politeness)
                    current_group_scores['argument_complexity'].append(utterance_mean_complexity)
                    current_group_scores['evidence_markers'].append(utterance_mean_evidence)
                    current_group_scores['hedging'].append(utterance_mean_hedging)

                    print(f"Utterance {change_point}: {len(utterance_interplay_scores)} paths -> 1 mean score")
                    print(f"Group {group_idx + 1}: {utterances_processed}/{target_utterances} utterances processed")
                else:
                    print(f"Utterance {change_point}: No valid paths found, skipping")

                scoring_time = time.time() - start_time
                print(f'Scoring: {scoring_time:.3f}s')

                # Print total time for this change point
                total_time = window_time + path_time + preprocess_time + feature_time + scoring_time
                print(f'TOTAL for utterance: {total_time:.3f}s\n')

        user_total_time = time.time() - user_start_time
        if user_change_points > 0:  # Only print if user had utterances
            print(f'USER {user_id}: {user_total_time:.3f}s ({user_change_points} utterances)')

    # Calculate means for each predictor for this group
    group_mean = {}
    for predictor_name, scores in current_group_scores.items():
        if scores:
            group_mean[predictor_name] = np.mean(scores)
        else:
            group_mean[predictor_name] = 0

    print(f"\nGroup {group_idx + 1} final sample sizes:")
    for predictor_name, scores in current_group_scores.items():
        print(f"  {predictor_name}: n={len(scores)}")

    print(f"Group {group_idx + 1}: Processed exactly {utterances_processed} utterances")

    group_means.append(group_mean)
    group_scores.append(current_group_scores)

Processing groups:   0%|          | 0/2 [00:00<?, ?it/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
USER BiscuitHammer: 0.109s (2 utterances)
Window extraction: 0.000s
Path extraction: 0.006s
Preprocessing: 0.001s
Utterance c6l1jg3: 49 paths -> 1 mean score
Group 1: 1714/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.019s

Window extraction: 0.000s
Path extraction: 0.010s
Preprocessing: 0.000s
Utterance c6yan3i: 89 paths -> 1 mean score
Group 1: 1715/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.020s

Window extraction: 0.000s
Path extraction: 0.014s
Preprocessing: 0.001s
Utterance cbwh2y0: 189 paths -> 1 mean score
Group 1: 1716/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.066s

USER DreadfulRauw: 0.105s (3 utterances)
Window extraction: 0.000s
Path extraction: 0.028s
Preprocessing: 0.001s
Utterance c7mirjd: 123 paths -> 1 mean score
Group 1: 1717/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.054s

USER MaeveningErnsmau: 0.054s (1 utterances)
W

Processing groups:  50%|█████     | 1/2 [12:02<12:02, 722.01s/it]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Preprocessing: 0.000s
Utterance 5xa6lh: 2 paths -> 1 mean score
Group 2: 1672/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.001s

USER RudeEtuxtable: 0.001s (1 utterances)
Window extraction: 0.000s
Path extraction: 0.000s
Preprocessing: 0.000s
Utterance 5xb4wp: 1 paths -> 1 mean score
Group 2: 1673/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.000s

Window extraction: 0.000s
Path extraction: 0.000s
Preprocessing: 0.000s
Utterance 9q6r7v: 3 paths -> 1 mean score
Group 2: 1674/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.001s

USER LikeALincolnLog42: 0.001s (2 utterances)
Window extraction: 0.000s
Path extraction: 0.000s
Preprocessing: 0.000s
Utterance 5xbn7g: 2 paths -> 1 mean score
Group 2: 1675/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.001s

USER chubz-: 0.001s (1 utterances)
Window extraction: 0.000s
Path extraction: 0.000s
Preprocessing: 0

Processing groups: 100%|██████████| 2/2 [12:12<00:00, 366.48s/it]

Path extraction: 0.003s
Preprocessing: 0.001s
Utterance 2ju3sy: 162 paths -> 1 mean score
Group 2: 2257/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.018s

Window extraction: 0.000s
Path extraction: 0.002s
Preprocessing: 0.000s
Utterance clf2tbz: 162 paths -> 1 mean score
Group 2: 2258/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.017s

USER onelongsigh: 0.035s (2 utterances)
Window extraction: 0.000s
Path extraction: 0.000s
Preprocessing: 0.000s
Utterance 2ju40g: No valid paths found, skipping
Scoring: 0.000s
TOTAL for utterance: 0.000s

Window extraction: 0.000s
Path extraction: 0.000s
Preprocessing: 0.000s
Utterance 23al9l: No valid paths found, skipping
Scoring: 0.000s
TOTAL for utterance: 0.000s

USER dousdi: 0.000s (2 utterances)
Window extraction: 0.000s
Path extraction: 0.001s
Preprocessing: 0.000s
Utterance 2jy57n: 43 paths -> 1 mean score
Group 2: 2261/2280 utterances processed
Scoring: 0.000s
TOTAL for utterance: 0.005s

Window extractio




In [21]:
groups_preprocessor.run_statistical_comparison(group_scores)

=== STATISTICAL SIGNIFICANCE TESTING ===

Group 1 sample sizes: [2276, 2276, 2276, 2276, 2276]
Group 2 sample sizes: [1746, 1746, 1746, 1746, 1746]

=== INTERPLAY ===
Group 1: μ = 0.8024, σ = 0.0359, n = 2276
Group 2: μ = 0.7687, σ = 0.1050, n = 1746
Difference: 0.0336 (+4.4%)
Normality: G1=False, G2=False
Equal variances: False
Test used: Mann-Whitney U test
Test statistic: 2666208.0000
p-value: 0.000000 ***
Result: p < 0.001
Effect size (Cohen's d): 0.4526 (Small)
--------------------------------------------------

=== POLITENESS ===
Group 1: μ = 0.8676, σ = 0.7810, n = 2276
Group 2: μ = 1.9950, σ = 1.4512, n = 1746
Difference: -1.1275 (-56.5%)
Normality: G1=False, G2=False
Equal variances: False
Test used: Mann-Whitney U test
Test statistic: 1080393.5000
p-value: 0.000000 ***
Result: p < 0.001
Effect size (Cohen's d): -1.0046 (Large)
--------------------------------------------------

=== ARGUMENT_COMPLEXITY ===
Group 1: μ = 1.0062, σ = 0.0500, n = 2276
Group 2: μ = 0.9679, σ = 0.06

Unnamed: 0,Predictor,Group_1_Mean,Group_1_SD,Group_1_N,Group_2_Mean,Group_2_SD,Group_2_N,Difference,Percent_Change,Test_Used,Test_Statistic,P_Value,Significance,Effect_Size_d,Effect_Interpretation,Significant
0,interplay,0.802371,0.035901,2276,0.768749,0.105025,1746,0.033623,4.373669,Mann-Whitney U test,2666208.0,2.591372e-77,***,0.452649,Small,True
1,politeness,0.867562,0.78101,2276,1.995015,1.45124,1746,-1.127453,-56.513508,Mann-Whitney U test,1080393.5,3.174961e-136,***,-1.004649,Large,True
2,argument_complexity,1.006197,0.050046,2276,0.967917,0.069675,1746,0.03828,3.954884,Mann-Whitney U test,2673026.5,8.024847e-79,***,0.64478,Medium,True
3,evidence_markers,0.156871,0.200367,2276,0.851626,0.833991,1746,-0.694755,-81.579793,Mann-Whitney U test,982892.5,5.529537e-167,***,-1.219355,Large,True
4,hedging,0.005981,0.004424,2276,0.00435,0.01019,1746,0.001631,37.4971,Mann-Whitney U test,2800501.0,3.199903e-111,***,0.217672,Small,True
