In [None]:
import pandas as pd
import json
import os
from sentence_transformers import SentenceTransformer, util

# Load multilingual embedding model
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

# Load JSON data
with open("../output/accounts_master.json", "r", encoding="utf-8") as f:
    data = json.load(f)

accounts = data["accounts"]

# Political keywords (Romanian terms likely used in page renaming)
political_keywords = [
    "ponta", "psd", "aur", "iohannis", "votăm", "susținem", "referendum",
    "alegeri", "partid", "românia mare", "politică", "protest", "unire", "guvern", "senator", "parlament"
]

# Threshold for semantic shift (1 - cosine similarity)
DISTANCE_THRESHOLD = 0.5

results = []

def contains_political_terms(text):
    text = text.lower()
    return any(keyword in text for keyword in political_keywords)

for account in accounts:
    history = account.get("history", [])
    name_changes = [h for h in history if h["event_type"] == "NameChange"]
    name_changes.sort(key=lambda x: x["date"])

    if len(name_changes) < 2:
        continue

    for i in range(1, len(name_changes)):
        prev = name_changes[i - 1]
        curr = name_changes[i]
        name_1, name_2 = prev["name"], curr["name"]

        emb_1 = model.encode(name_1, convert_to_tensor=True)
        emb_2 = model.encode(name_2, convert_to_tensor=True)
        similarity = util.cos_sim(emb_1, emb_2).item()
        distance = 1 - similarity

        political_now = contains_political_terms(name_2)
        political_before = contains_political_terms(name_1)
        became_political = political_now and not political_before

        if distance > DISTANCE_THRESHOLD or became_political:
            results.append({
                "page": f"[{account['name']}]({account['profile_url']})",
                "from_date": prev["date"],
                "from_name": name_1,
                "to_date": curr["date"],
                "to_name": name_2,
                "distance": round(distance, 3),
                "became_political": political_now and not political_before,
                "political_to_political": political_now and political_before
            })

# Prepare DataFrame
df_results = pd.DataFrame(results).sort_values(by=["became_political", "distance"], ascending=[False, False])

# Save markdown report
os.makedirs("../reports", exist_ok=True)
with open("../reports/name_change_political_pivot.md", "w", encoding="utf-8") as f:
    f.write("# Pages with Potential Political Pivot via Name Changes\n\n")
    f.write("This report flags pages that have:\n")
    f.write("- Made a semantic shift in name (distance > 0.5), **OR**\n")
    f.write("- Started using known political keywords in a newer name\n\n")
    f.write(df_results.to_markdown(index=False))
