In [None]:
!git clone https://github.com/Sharp-4rth/temporal_belief_analysis.git

In [None]:
# Need to restart after:
!pip install convokit[llm]
!pip install convokit

In [None]:
import sys
import os

os.chdir('/content/temporal_belief_analysis/notebooks')
print("Changed working directory to:", os.getcwd())
# Absolute path to src directory
src_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

In [None]:
import time
!pip install gdown
import zipfile
import nltk
from nltk.corpus import stopwords
from convokit import Corpus, download
import convokit
from temporal_belief.core.timeline_building import TimelineBuilder

In [None]:
# Download and unzip with python:
!gdown "https://drive.google.com/file/d/10HNQHIsFe386oBu_o5s9JOuo-wiaofXC/view?usp=sharing" -O "/content/temporal_belief_analysis/pd_corpus_with_stances150000_llm.zip" --fuzzy
zipfile.ZipFile("/content/temporal_belief_analysis/pd_corpus_with_stances150000_llm.zip").extractall("/content/temporal_belief_analysis")

In [22]:
# Load corpus
corpus = Corpus(filename="/content/temporal_belief_analysis/pd_corpus_with_stances150000_llm_rerun")

In [104]:
# Inputs:
STANCE2ID = {"neutral":0, "left-leaning":1, "right-leaning":2}
NORMALIZE = {
    "neutral":"neutral",
    "left":"left-leaning", "left-leaning":"left-leaning",
    "right":"right-leaning","right-leaning":"right-leaning",
}
VALID = set(STANCE2ID.keys())

utterances, labels, utt_ids, user_ids, topics = [], [], [], [], []

for u in corpus.iter_utterances():
    txt = (u.text or "").strip()
    stance = u.meta.get('detected_stance')
    main_post_id = u.conversation_id
    main_post = corpus.get_utterance(main_post_id)
    conv = corpus.get_conversation(main_post_id)

    conf_raw = u.meta.get('stance_confidence')
    # normalize confidence
    try:
        conf = float(conf_raw)
    except (TypeError, ValueError):
        conf = None

    # --------- SKIP problematic utterances ----------
    # missing/empty main post or duplicate of main post
    if not txt or txt in {'[removed]', '[deleted]', '.'}:
        continue

    if not main_post.text or txt == main_post.text:
        continue

    if 'stance_error' in u.meta.keys() and stance == 'neutral':
        continue

    if stance not in {'left-leaning', 'right-leaning', 'neutral'}:
        continue

    # confidence threshold depends on stance
    if stance == 'neutral':
        if conf is None or conf < 0.9:
            continue
    elif stance == 'left-leaning':
        if conf is None or conf < 0.9:
            continue
    elif stance == 'right-leaning':
        if conf is None or conf < 0.8:
            continue

    raw = u.meta.get("detected_stance")

    if not raw:
        continue
    lab = NORMALIZE.get(str(raw).strip().lower())  # map aliases
    if lab not in VALID:                           # filters Unknown / other
        continue

    topic = u.meta.get("topic")  # or compute from your mapping

    utterances.append(txt)
    labels.append(lab)
    utt_ids.append(u.id)
    user_ids.append(u.speaker.id if u.speaker else "unknown")
    topics.append(topic)

In [106]:
# --- Cap max utterances per label ---
from collections import defaultdict
import numpy as np

# Set your per-label maximums here:
MAX_PER_LABEL = {
    "neutral": 2000,
    "left-leaning": 9000,
    "right-leaning": 6000,
}

# Shuffle indices first for randomness
all_idx = np.arange(len(labels))
rng = np.random.default_rng(42)
rng.shuffle(all_idx)

# Collect capped indices
label_counts = defaultdict(int)
keep_idx = []

for i in all_idx:
    lab = labels[i]
    if lab in MAX_PER_LABEL and label_counts[lab] < MAX_PER_LABEL[lab]:
        keep_idx.append(i)
        label_counts[lab] += 1

# Apply filtering
utterances = [utterances[i] for i in keep_idx]
labels     = [labels[i] for i in keep_idx]
utt_ids    = [utt_ids[i] for i in keep_idx]
user_ids   = [user_ids[i] for i in keep_idx]
topics     = [topics[i] for i in keep_idx]

print("Final counts per label:")
from collections import Counter
print(Counter(labels))

Final counts per label:
Counter({'left-leaning': 6334, 'right-leaning': 6000, 'neutral': 2000})


In [107]:
# make a user-based split
import numpy as np
rng = np.random.default_rng(42)
uniq_users = np.array(sorted(set(user_ids)))
rng.shuffle(uniq_users)
cut = int(0.8 * len(uniq_users))
train_users = set(uniq_users[:cut])
val_users   = set(uniq_users[cut:])

train_idx = np.array([i for i,u in enumerate(user_ids) if u in train_users])
val_idx   = np.array([i for i,u in enumerate(user_ids) if u in val_users])

# Sanity
assert len(utterances) == len(labels) == len(utt_ids) == len(user_ids)

print(f"Kept {len(utterances)} labeled utterances.")

Kept 14334 labeled utterances.


In [108]:
from collections import Counter
import numpy as np

order = ['left-leaning', 'neutral', 'right-leaning']  # desired print order

# --- overall counts ---
all_counts = Counter(labels)
total = len(labels)
print("Overall:")
for k in order:
    c = all_counts.get(k, 0)
    print(f"  {k:13s}: {c:6d} ({c/total:.1%})")

# --- per-split counts (uses your train_idx / val_idx) ---
def counts_for_idx(idx):
    return Counter(labels[i] for i in idx)

print("\nTrain split:")
train_counts = counts_for_idx(train_idx)
for k in order:
    c = train_counts.get(k, 0)
    print(f"  {k:13s}: {c:6d} ({c/len(train_idx):.1%})")

print("\nVal split:")
val_counts = counts_for_idx(val_idx)
for k in order:
    c = val_counts.get(k, 0)
    print(f"  {k:13s}: {c:6d} ({c/len(val_idx):.1%})")

print("\nClass weights (index order per STANCE2ID):", class_weights)

Overall:
  left-leaning :   6334 (44.2%)
  neutral      :   2000 (14.0%)
  right-leaning:   6000 (41.9%)

Train split:
  left-leaning :   5108 (44.6%)
  neutral      :   1661 (14.5%)
  right-leaning:   4687 (40.9%)

Val split:
  left-leaning :   1226 (42.6%)
  neutral      :    339 (11.8%)
  right-leaning:   1313 (45.6%)

Class weights (index order per STANCE2ID): [2.389      0.75434166 0.7963333 ]


In [110]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset
import torch, numpy as np
from transformers import EarlyStoppingCallback
from torch.utils.data import DataLoader, WeightedRandomSampler  # <-- add this
from collections import Counter
import torch
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from transformers import Trainer
import torch.nn as nn

# STANCE2ID = {"neutral":0, "left-leaning":1, "right-leaning":2}
ID2STANCE = {v:k for k,v in STANCE2ID.items()}
MAX_LEN = 128

# utterances: list[str]; labels: list[str] aligned with utterances
# Prefer split by USER so test utterances come from unseen users.
utterances = utterances   # texts
labels     = labels
train_idx, val_idx = train_idx, val_idx # indices for your split (ideally user-based)


tok = AutoTokenizer.from_pretrained("/content/temporal_belief_analysis/stance-clf-best")
model = AutoModelForSequenceClassification.from_pretrained(
    "/content/temporal_belief_analysis/stance-clf-best", num_labels=3, id2label=ID2STANCE, label2id=STANCE2ID
)

class StanceDS(Dataset):
    def __init__(self, texts, labs):
        self.enc = tok(texts, truncation=True, padding=True, max_length=MAX_LEN, return_tensors="pt")
        self.y = torch.tensor([STANCE2ID[l] for l in labs], dtype=torch.long)
    def __len__(self): return len(self.y)
    def __getitem__(self, i):
        item = {k:v[i] for k,v in self.enc.items()}
        item["labels"] = self.y[i]
        return item

train_ds = StanceDS([utterances[i] for i in train_idx], [labels[i] for i in train_idx])
val_ds   = StanceDS([utterances[i] for i in val_idx],   [labels[i] for i in val_idx])

train_counts = Counter(labels[i] for i in train_idx)
num_classes = len(STANCE2ID)
total_train = len(train_idx)

# build inverse-freq weights
weights_np = np.zeros(num_classes, dtype=np.float32)
for stance, idx in STANCE2ID.items():
    freq = train_counts.get(stance, 1)
    weights_np[idx] = total_train / (num_classes * freq)

# explicit boosts
weights_np[STANCE2ID["neutral"]]      *= 0.7
weights_np[STANCE2ID["left-leaning"]] *= 1.1
weights_np[STANCE2ID["right-leaning"]]*= 1.8

# tensor + normalize
class_weights = torch.tensor(weights_np, dtype=torch.float)
class_weights = class_weights / class_weights.mean()

# class_weights = torch.clamp(class_weights, 0.7, 1.20)  # optional, mild

class WeightedTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        # make sure we store a torch tensor
        if isinstance(class_weights, np.ndarray):
            class_weights = torch.tensor(class_weights, dtype=torch.float)
        elif class_weights is not None and not isinstance(class_weights, torch.Tensor):
            class_weights = torch.tensor(class_weights, dtype=torch.float)
        self.class_weights = class_weights

    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        if self.class_weights is not None:
            w = self.class_weights.to(device=logits.device, dtype=logits.dtype)
        else:
            w = None

        loss_fct = torch.nn.CrossEntropyLoss(weight=w, label_smoothing=0.1)
        loss = loss_fct(logits, labels)

        inputs["labels"] = labels
        return (loss, outputs) if return_outputs else loss

def compute_metrics(eval_pred):
    logits, y_true = eval_pred
    y_pred = logits.argmax(axis=-1)
    acc = accuracy_score(y_true, y_pred)
    p, r, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average="macro", zero_division=0
    )
    return {
        "accuracy": acc,
        "macro_precision": p,
        "macro_recall": r,
        "macro_f1": f1,
    }

# init training argument
args = TrainingArguments(
    "stance-clf",
    learning_rate=3e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=64,
    num_train_epochs=5,                 # allow up to 5 but early-stop
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",   # <-- change
    greater_is_better=True,             # <-- change
    warmup_ratio=0.08,
    weight_decay=0.005,
    max_grad_norm=1.0,
    report_to=[],
    logging_strategy="epoch",       # <-- add this
    logging_first_step=True
)

# Init trainer
trainer = WeightedTrainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tok,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
    # class_weights=class_weights
)
trainer.train()
trainer.save_model("stance-clf-best-domain-adapted")
tok.save_pretrained("stance-clf-best-domain-adapted")

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,Macro Precision,Macro Recall,Macro F1
1,1.2109,0.88169,0.582349,0.570145,0.597093,0.581284
2,0.8482,0.883045,0.586171,0.569958,0.592835,0.579884
3,0.7296,0.969574,0.601112,0.602691,0.579916,0.585798
4,0.6044,0.996736,0.609451,0.599941,0.578353,0.586785
5,0.5108,1.113419,0.604587,0.590604,0.586767,0.588639


('stance-clf-best-domain-adapted/tokenizer_config.json',
 'stance-clf-best-domain-adapted/special_tokens_map.json',
 'stance-clf-best-domain-adapted/vocab.txt',
 'stance-clf-best-domain-adapted/bpe.codes',
 'stance-clf-best-domain-adapted/added_tokens.json')

In [112]:
trainer.state.best_model_checkpoint and trainer.state.best_metric

0.5886393061477343

In [111]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

out = trainer.predict(val_ds)
y_pred = np.argmax(out.predictions, axis=1)
y_true = out.label_ids

print(classification_report(
    y_true, y_pred,
    target_names=["neutral","left-leaning","right-leaning"],
    digits=2
))
print("Confusion matrix (rows=true, cols=pred):")
print(confusion_matrix(y_true, y_pred))

               precision    recall  f1-score   support

      neutral       0.55      0.53      0.54       339
 left-leaning       0.58      0.59      0.59      1226
right-leaning       0.64      0.64      0.64      1313

     accuracy                           0.60      2878
    macro avg       0.59      0.59      0.59      2878
 weighted avg       0.60      0.60      0.60      2878

Confusion matrix (rows=true, cols=pred):
[[181  95  63]
 [ 94 722 410]
 [ 54 422 837]]
