In [None]:
# Need to restart after:
!pip install convokit[llm]
!pip install convokit

In [None]:
# Download file from Google Drive to colab directory
import time
import sys
import os
!pip install gdown
import zipfile
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
from convokit import Corpus, download
import convokit
from temporal_belief.core.timeline_building import TimelineBuilder
from temporal_belief.core.change_detection import ChangeDetector
from temporal_belief.core.window_extraction import WindowExtractor
from temporal_belief.core.op_path_pairing import OpPathPairer
from temporal_belief.data.preprocessors import ChangeDetectorPreprocessor
from temporal_belief.data.preprocessors import PairPreprocessor
from temporal_belief.core.interplay import Interplay

In [None]:
# Download and unzip with python (Dataloaders):
!gdown "https://drive.google.com/file/d/1N0U_jUJlOYjdaju2FaU8p87uB22YBxJ0/view?usp=sharing" -O "/content/temporal_belief_analysis/pd_corpus_with_stances100000_chronological.zip" --fuzzy
!gdown "https://drive.google.com/file/d/1DLFY6JLMZqNjwvNRZmhlV4-rnoQP_eyH/view?usp=sharing" -O "/content/temporal_belief_analysis/merged_corpus_checkpoint_5.zip" --fuzzy
!gdown "https://drive.google.com/file/d/1nWaj5N8nsG7u5homv_kAh4CLPDv01M_Z/view?usp=sharing" -O "/content/temporal_belief_analysis/pd_corpus_with_topics.zip" --fuzzy

zipfile.ZipFile("/content/temporal_belief_analysis/pd_corpus_with_stances100000_chronological.zip").extractall("/content/temporal_belief_analysis")
zipfile.ZipFile("/content/temporal_belief_analysis/merged_corpus_checkpoint_5.zip").extractall("/content/temporal_belief_analysis")
zipfile.ZipFile("/content/temporal_belief_analysis/pd_corpus_with_topics.zip").extractall("/content/temporal_belief_analysis")

In [None]:
# Load a corpus:
corpus = Corpus(filename="/content/temporal_belief_analysis/pd_corpus_with_topics")

In [None]:
timeline_builder = TimelineBuilder(corpus)
timelines = timeline_builder.build_timelines()

In [None]:
change_detector = ChangeDetector()
groups = change_detector.get_two_groups(timelines)

In [None]:
window_extractor = WindowExtractor(corpus, timelines)
window_extractor.user_conversations_cache = window_extractor.build_global_user_conversations_index()

In [None]:
op_path_pairer = OpPathPairer(corpus, timelines)
pair_preprocessor = PairPreprocessor()

In [None]:
persuasion_analyzer = Interplay()

In [None]:
# Load English stop words
stop_words_set = set(stopwords.words('english'))

# use the groups
groups = change_detector.get_two_groups(timelines)
groups_tuple = (groups['with_changes'], groups['no_changes'])

# Init
i = 0
group_means = [] # Initialize as a list to append means
group_scores = []
utts_num = 0

# For each group
for group in groups_tuple:
    current_group_scores = []

    for user_id, topic_timelines in group.items():
        user_start_time = time.time()
        user_change_points = 0

        for topic_timeline in topic_timelines.values():

            for change_point in topic_timeline.keys(): # Iterate through change points (keys)
                print(f'User: {user_id}, topic: {topic_timeline}, change point {change_point}')
                utts_num += 1

                user_change_points += 1

                # TIME: Window extraction
                start_time = time.time()
                try:
                    candidate_convos = window_extractor.get_conversations_around_change_point(
                        change_point=change_point, corpus=corpus
                    )
                    window_time = time.time() - start_time
                    print(f'⏱️ Window extraction: {window_time:.3f}s')
                except ValueError as e:
                    print(f"Skipping change point {change_point}: {e}")
                    continue


                # TIME: Path extraction
                start_time = time.time()
                op_path_pairs = []
                for candidate_convo in candidate_convos:
                    try:
                        op_path_pairs.extend(op_path_pairer.extract_rooted_path_from_candidate_convos(
                            [candidate_convo], user_id
                        ))
                    except ValueError as e:
                        print(f"Skipping conversation {candidate_convo.id}: {e}")
                        continue
                path_time = time.time() - start_time
                print(f'⏱️ Path extraction: {path_time:.3f}s')


                # TIME: Preprocessing
                start_time = time.time()
                preprocessed_pairs = pair_preprocessor.concatenate_path_in_all_pairs(op_path_pairs)
                preprocess_time = time.time() - start_time
                print(f'⏱️ Preprocessing: {preprocess_time:.3f}s')


                # TIME: Feature extraction
                start_time = time.time()
                features_list = []
                for op, paths in preprocessed_pairs:
                    for k, concatenated_utts in paths.items():
                        interplay_features = persuasion_analyzer.calculate_interplay_features(
                            op.text, concatenated_utts, stop_words_set
                        )
                        features_list.append(interplay_features)
                feature_time = time.time() - start_time
                print(f'⏱️ Feature extraction: {feature_time:.3f}s')


                # TIME: Scoring
                start_time = time.time()
                scores = []
                for interplay_features in features_list:
                    score = persuasion_analyzer.calculate_persuasion_score(interplay_features)
                    scores.append(score)
                scoring_time = time.time() - start_time
                print(f'⏱️ Scoring: {scoring_time:.3f}s')
                total_time = window_time + path_time + preprocess_time + feature_time + scoring_time
                print(f'🔥 TOTAL for change point: {total_time:.3f}s\n')

                # Print total time for this change point
                total_time = window_time + path_time + preprocess_time + feature_time + scoring_time
                print(f'🔥 TOTAL for change point: {total_time:.3f}s\n')

                current_group_scores.extend(scores)

        # TIME: End timing this user
        user_total_time = time.time() - user_start_time
        print(f'👤 USER {user_id} TOTAL: {user_total_time:.3f}s ({user_change_points} change points)')
        print(f'📊 Average per change point: {user_total_time/max(1, user_change_points):.3f}s\n')

    # Calculate mean for this group
    total = 0
    num_of_scores = 0
    for score in current_group_scores: # Iterate through individual scores
        total += score
        num_of_scores += 1

    group_mean = total / num_of_scores if num_of_scores > 0 else 0 # Handle division by zero
    group_means.append(group_mean) # Append mean to the list

# Print the calculated group means
print(f'Group Means: {group_means}')