# Application of ConDynS on WikiConv German

Dataset information can be found: https://convokit.cornell.edu/documentation/wikiconv.html

In [None]:
import json
from convokit import Corpus
from tqdm import tqdm
import matplotlib.pyplot as plt
import ast
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
import numpy as np
import re
import scipy.stats as stats
from itertools import combinations
import random
import string
import math
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from scipy.spatial.distance import squareform
from sklearn.feature_extraction.text import CountVectorizer as CV
import string

from convokit.genai.genai_config import GenAIConfigManager
from convokit.convo_similarity.summary import SCDWriter
from convokit.convo_similarity.condyns import ConDynS

[nltk_data] Downloading package punkt to /home/kz88/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### Load WikiConvo German Conversations and Format It

In [None]:
### Setup path for data and corpus ###

DATA_PATH = "./data"
filepath = DATA_PATH + "PATH TO WIKI GERMAN DATA"

### Set up config for GenAI ###
config = GenAIConfigManager() ### make sure to set your own config if this is never set before

### Select which model provider to use for ConDynS ###
MODEL_PROVIDER = "gemini"
MODEL = "gemini-2.0-flash-001"
config.set_google_cloud_config("YOUR PROJECT", "YOUR LOCATION")

with open(filepath, "r") as f:
    dataset = json.load(f)

random.seed(4300)
dataset = random.sample(dataset, 100)
len(dataset)

100

In [3]:
convo = dataset[0]
convo

{'convo_id': '64211111.132.132',
 'utterances': [{'isUnchanged': False,
   'page_title': 'Diskussion:Schlittenseilbahn',
   'parent_id': None,
   'user_text': 'Pechristener',
   'timestamp': '2009-09-06T17:49:41Z',
   'content': '== Schlitten-Standseilbahn => Schlittenseilbahn ==\n',
   'cleaned_content': ' Schlitten-Standseilbahn => Schlittenseilbahn ',
   'replyTo_id': None,
   'page_id': '2160865',
   'indentation': -1,
   'authors': ['5088:Pechristener'],
   'conversation_id': '64211111.132.132',
   'user_id': '5088',
   'type': 'CREATION',
   'id': '64211111.132.132',
   'ancestor_id': '64211111.132.132',
   'rev_id': 64211111},
  {'isUnchanged': False,
   'page_title': 'Diskussion:Schlittenseilbahn',
   'parent_id': None,
   'user_text': 'Pechristener',
   'timestamp': '2009-09-06T17:49:41Z',
   'content': "Ich habe mir erlaubt den Artikel nach ''Schlittenseilbahn'' zu verschieben. Ich bin einverstanden, dass [[Funi (Seilbahn)]] kein schlaues Lemma für einen Artikel ist, ''Schlit

In [20]:
def format_wiki_german_convo(convo, truncated_by = 0, start_at = 0): 
    utt_list = convo['utterances']
    transcription = []
    spk_list = {}
    utt_list = utt_list[:len(utt_list) - truncated_by]
    utt_list = utt_list[start_at:]
    for utt in utt_list:
        sp = utt['authors'][0]
        if sp not in spk_list.keys():
            spk_list[sp] = len(spk_list) + 1
        transcription.append("SPEAKER"+str(spk_list[sp]) +": "+utt['cleaned_content'])
    return transcription

### Generating SoP

In [None]:
wiki_german_SCD_prompt = """
Write a short summary capturing the trajectory of a Wikipedia talk-page discussion. Do not include specific article content, titles, policy names, diffs/edits, quotes, or concrete claims. The style you should avoid is illustrated in Example Sentence 1: “Speaker1 insisted an article include a particular detail and cited a specific policy by name. Speaker2 countered with a different policy and argued that the section should be removed. Speaker3 referenced a prior version and proposed a precise rewrite.” Instead, you should include indicators of sentiments (e.g., sarcasm, politeness, frustration), intentions (e.g., agreement, disagreement, rebuttal, concession, clarification, accusation), and strategies (e.g., consensus attempts, moderation, revert-restore cycles, rhetorical questions, appeals to emotion). The following sentences demonstrate the style you should follow: Example Sentence 2: “Both speakers hold differing views and become defensive. Speaker1 diminishes the weight of Speaker2’s reasoning, and Speaker2 blames Speaker1 for an uncivil tone. Both accuse each other of focusing on personal traits rather than reasoning.” Example Sentence 3: “The speakers refute each other with back-and-forth accusations. Persistent fault-finding and critical stances escalate tension and hinder productive discussion.” Overall, the trajectory summary should capture the key moments where the discussion’s tone or coordination changes. Here is an example of a complete trajectory summary: Multiple speakers discuss possible changes. Several present differing stances in sequence, building on and contesting each other’s reasoning. Speaker1 disputes a point from Speaker2, prompting a rebuttal. Speaker3 supports Speaker1, after which Speaker2 defends their position. Later, a speaker references a removed remark and offers an extended counter. Despite friction, the tone remains mostly civil with attempts at consensus. Now, provide the trajectory summary for the following conversation. Conversation Transcript: {transcript}. Now, summarize this conversation. Remember, do not include specific topics, claims, policies, or edits. Instead, capture the speakers’ sentiments, intentions, and strategies. Limit the trajectory summary to 80 words. Trajectory Summary (in English):
"""

In [None]:
scd_writer = SCDWriter(model_provider=MODEL_PROVIDER, 
                       model=MODEL, 
                       config=config, 
                       custom_scd_prompt=wiki_german_SCD_prompt, 
                       custom_prompt_dir="wiki_german")
condyns = ConDynS(model_provider=MODEL_PROVIDER, 
                  model=MODEL, 
                  config=config)

In [None]:
time_analysis_scd = {}
bulletpoints = {}
for convo in tqdm(dataset, desc="Generating SCDs and SoPs for conversations"):
    convo_id = convo['convo_id']
    scd, sop = scd_writer.get_scd_and_sop("\n\n".join(format_wiki_german_convo(convo)))
    time_analysis_scd[convo_id] = scd
    bulletpoints[convo_id] = sop

Generating SCDs for conversations: 100%|██████| 100/100 [01:40<00:00,  1.00s/it]


In [None]:
with open(DATA_PATH + f"wiki_german_100_scd.json", 'w') as file:
    json.dump(time_analysis_scd, file, indent=4)

with open(DATA_PATH + f"wiki_german_100_sop.json", 'w') as file:
    json.dump(bulletpoints, file, indent=4)

In [16]:
similarity_and_bulletpoints = {"scd" : time_analysis_scd, "bulletpoints" : bulletpoints}

### Compute ConDynS Scores

In [10]:
convo_id_to_convo = {}
for convo in dataset:
    convo_id_to_convo[convo['convo_id']] = convo

In [None]:
num = 100
convo_ids = [convo['convo_id'] for convo in dataset]
assert len(convo_ids) == num

all_combos = list(combinations(convo_ids, 2))
convo_scores = {}

for convo_id1, convo_id2 in tqdm(all_combos, desc="Calculating pairs similarity"):
    if convo_id1 + "_" + convo_id2 in convo_scores or convo_id2 + "_" + convo_id1 in convo_scores:
        continue
    convo1 = convo_id_to_convo[convo_id1]
    convo2 = convo_id_to_convo[convo_id2]
    transcript1 = "\n\n".join(format_wiki_german_convo(convo1))
    transcript2 = "\n\n".join(format_wiki_german_convo(convo2))

    sop1 = bulletpoints[convo_id1]
    sop2 = bulletpoints[convo_id2]
    
    result = condyns.compute_bidirectional_similarity(convo_id1, convo_id2, sop1, sop2)
    score = condyns.compute_score_from_results(result)
    
    convo_scores[convo_id1 + "_" + convo_id2]["result"] = result
    convo_scores[convo_id1 + "_" + convo_id2]["score"] = score

In [None]:
with open(DATA_PATH + f"wiki_german_100_scores.json", 'w') as file:
    json.dump(convo_scores, file, indent=4)

In [None]:
convo_scores['64211111.132.132_152670880.5344.5344']["result"][0]

{'0': {'analysis': 'Speaker1 proposes mentioning a Verbrauch value, providing context and justifications based on test results.',
  'score': 0.8},
 '1': {'analysis': 'No disagreement or defense of an original term is present in the provided transcript.',
  'score': 0.0},
 '2': {'analysis': 'No compromise or further advocacy from Speaker1 is evident in the provided transcript.',
  'score': 0.0},
 '3': {'analysis': 'No alignment of Speaker3 and Speaker2 against Speaker1 is present.',
  'score': 0.0},
 '4': {'analysis': 'Speaker1 does not persist in their viewpoint or refute others claims.',
  'score': 0.0},
 '5': {'analysis': 'No direct attack on Speaker1 reasoning or dismissal of understanding occurs.',
  'score': 0.0},
 '6': {'analysis': 'No support for an attacker position is present in the transcript.',
  'score': 0.0},
 '7': {'analysis': 'No historical explanation or acknowledgment of complexity is offered.',
  'score': 0.0}}

# Clustering

In [None]:
def get_similarity(convo_id1, convo_id2):
    if convo_id1 + "_" + convo_id2 in convo_scores:
        return convo_scores[convo_id1 + "_" + convo_id2]["score"]
    elif convo_id2 + "_" + convo_id1 in convo_scores:
        return convo_scores[convo_id1 + "_" + convo_id2]["score"]
    else:
        print("Did not find the score")
        return

In [14]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform

# Step 1: Create the distance matrix
n = len(convo_ids)
distance_matrix = np.zeros((n, n))

# Fill the distance matrix
for i in range(n):
    for j in range(i + 1, n):
        convo1, convo2 = convo_ids[i], convo_ids[j]
        similarity = np.sum(get_similarity(convo1, convo2))
        distance = 2 - similarity  # Convert similarity to distance
        distance_matrix[i, j] = distance_matrix[j, i] = distance  # Symmetric matrix

# Convert to condensed format for linkage function
condensed_dist_matrix = squareform(distance_matrix)

# Step 2: Perform hierarchical clustering
linkage_matrix = linkage(condensed_dist_matrix, method="ward")  # Ward's method minimizes variance


from scipy.cluster.hierarchy import fcluster
from collections import defaultdict

top_level_clusters = fcluster(linkage_matrix, t=2, criterion='maxclust')

clusters = defaultdict(list)
for idx, label in enumerate(top_level_clusters):
    clusters[label].append(idx)

In [None]:
# clusters[1] and clusters[2] are your top-level clusters
cluster1 = [time_analysis_scd[convo_ids[i]] for i in clusters[1]]
cluster2 = [time_analysis_scd[convo_ids[i]] for i in clusters[2]]

In [None]:
# from https://github.com/jmhessel/FightingWords/blob/master/fighting_words_py3.py
exclude = set(string.punctuation)

def basic_sanitize(in_string):
    '''Returns a very roughly sanitized version of the input string.'''
    in_string = ''.join([ch for ch in in_string if ch not in exclude])
    in_string = in_string.lower()
    in_string = ' '.join(in_string.split())
    return in_string

def bayes_compare_language(l1, l2, ngram = 1, prior=.01, cv = None):
    '''
    Arguments:
    - l1, l2; a list of strings from each language sample
    - ngram; an int describing up to what n gram you want to consider (1 is unigrams,
    2 is bigrams + unigrams, etc). Ignored if a custom CountVectorizer is passed.
    - prior; either a float describing a uniform prior, or a vector describing a prior
    over vocabulary items. If you're using a predefined vocabulary, make sure to specify that
    when you make your CountVectorizer object.
    - cv; a sklearn.feature_extraction.text.CountVectorizer object, if desired.

    Returns:
    - A list of length |Vocab| where each entry is a (n-gram, zscore) tuple.'''
    if cv is None and type(prior) is not float:
        print("If using a non-uniform prior:")
        print("Please also pass a count vectorizer with the vocabulary parameter set.")
        quit()
    l1 = [basic_sanitize(l) for l in l1]
    l2 = [basic_sanitize(l) for l in l2]
    if cv is None:
        cv = CV(decode_error = 'ignore', min_df=2, max_df=0.9, ngram_range=(1,ngram),
                binary = False,
                max_features = 15000)
    counts_mat = cv.fit_transform(l1+l2).toarray()
    # Now sum over languages...
    vocab_size = len(cv.vocabulary_)
    print("Vocab size is {}".format(vocab_size))
    if type(prior) is float:
        priors = np.array([prior for i in range(vocab_size)])
    else:
        priors = prior
    z_scores = np.empty(priors.shape[0])
    count_matrix = np.empty([2, vocab_size], dtype=np.float32)
    count_matrix[0, :] = np.sum(counts_mat[:len(l1), :], axis = 0)
    count_matrix[1, :] = np.sum(counts_mat[len(l1):, :], axis = 0)
    a0 = np.sum(priors)
    n1 = 1.*np.sum(count_matrix[0,:])
    n2 = 1.*np.sum(count_matrix[1,:])
    print("Comparing language...")
    for i in range(vocab_size):
        #compute delta
        term1 = np.log((count_matrix[0,i] + priors[i])/(n1 + a0 - count_matrix[0,i] - priors[i]))
        term2 = np.log((count_matrix[1,i] + priors[i])/(n2 + a0 - count_matrix[1,i] - priors[i]))
        delta = term1 - term2
        #compute variance on delta
        var = 1./(count_matrix[0,i] + priors[i]) + 1./(count_matrix[1,i] + priors[i])
        #store final score
        z_scores[i] = delta/np.sqrt(var)
    index_to_term = {v:k for k,v in cv.vocabulary_.items()}
    sorted_indices = np.argsort(z_scores)
    return_list = []
    for i in sorted_indices:
        return_list.append((index_to_term[i], z_scores[i]))
    return return_list

In [None]:
def get_fighting_words_matching_bullets(cluster1, cluster2, similarity_result=convo_scores, summaries_and_bullets=similarity_and_bulletpoints):
    cluster1_combo = list(combinations(cluster1, 2))
    matched_cluster1 = []
    for convo_id1, convo_id2 in cluster1_combo:
        key = f"{convo_id1}_{convo_id2}" if f"{convo_id1}_{convo_id2}" in similarity_result.keys() else f"{convo_id2}_{convo_id1}"
        for k, result in enumerate(similarity_result[key]["result"]):
            for index in result.keys():
                if result[index]['score'] > 0.5:
                    if k == 0:
                        matched_cluster1.append(summaries_and_bullets['bulletpoints'][convo_id1][index])
                    else:
                        try:
                            matched_cluster1.append(summaries_and_bullets['bulletpoints'][convo_id2][index])
                        except Exception:
                            continue
                        
    cluster2_combo = list(combinations(cluster2, 2))
    matched_cluster2 = []
    for convo_id1, convo_id2 in cluster2_combo:
        key = f"{convo_id1}_{convo_id2}" if f"{convo_id1}_{convo_id2}" in similarity_result.keys() else f"{convo_id2}_{convo_id1}"
        for k, result in enumerate(similarity_result[key]["result"]):
            for index in result.keys():
                if result[index]['score'] > 0.5:
                    if k == 0:
                        matched_cluster2.append(summaries_and_bullets['bulletpoints'][convo_id1][index])
                    else:
                        matched_cluster2.append(summaries_and_bullets['bulletpoints'][convo_id2][index])
    
    z_scores = bayes_compare_language(matched_cluster1, matched_cluster2, ngram = 3) 
    top_k = 15
    top_k_class1 = list(reversed([(x[0], round(x[1],2)) for x in z_scores[-top_k:]]))
    top_k_class2 = [(x[0], round(x[1],2)) for x in z_scores[:top_k]]
    top_k_class1 = list(reversed([(x[0], round(x[1],2)) for x in z_scores[-top_k:]]))
    top_k_class2 = [(x[0], round(x[1],2)) for x in z_scores[:top_k]]
    print(f"Fighting Words Comments between:")
    print("Cluster1: ", top_k_class1)
    print("Cluster2: ", top_k_class2)
    return matched_cluster1, matched_cluster2

In [46]:
len(clusters[1]), len(clusters[2])

(45, 55)

In [47]:
cluster1_ids = [convo_ids[i] for i in clusters[1]]
cluster2_ids = [convo_ids[i] for i in clusters[2]]

In [51]:
cluster1_bulletpoints, cluster2_bulletpoints = get_fighting_words_matching_bullets(cluster1_ids, cluster2_ids)

Vocab size is 6959
Comparing language...
Fighting Words Comments between:
Cluster1:  [('message', 19.06), ('gratitude', 17.8), ('issue', 17.58), ('repeats', 16.04), ('responds with', 15.72), ('is', 15.18), ('helpful', 14.28), ('polite', 14.06), ('speaker2 responds with', 13.81), ('same', 12.86), ('tone is', 12.55), ('expresses gratitude', 12.31), ('the same', 11.93), ('with gratitude', 11.77), ('brief', 11.6)]
Cluster2:  [('speaker3', -16.28), ('disagreement', -12.94), ('expressing', -11.49), ('confusion', -10.95), ('with question', -9.81), ('conversation with question', -9.76), ('enters', -9.18), ('additional', -8.4), ('speaker4', -8.18), ('point', -8.06), ('with question expressing', -7.93), ('question expressing', -7.93), ('speaker3 enters', -7.65), ('with speaker2', -7.64), ('defends', -7.6)]


In [52]:
cluster1_bulletpoints

['Speaker1 repeats the same information and question multiple times',
 'The repeated questioning suggests Speaker1 is trying to emphasize a point',
 'Speaker1 repeats the same information and question multiple times',
 'The repetition indicates persistence and perhaps a hint of frustration at the lack of response',
 'The repeated questioning suggests Speaker1 is trying to emphasize a point',
 'Speaker1 is possibly confused or insistent on getting an answer',
 'The overall tone is neutral but persistent',
 'Speaker1 initiates the conversation with a question, seemingly seeking input or validation',
 'Speaker1 repeats the same information and question multiple times',
 'The repetition indicates persistence and perhaps a hint of frustration at the lack of response',
 'The repeated questioning suggests Speaker1 is trying to emphasize a point',
 'Speaker1 is possibly confused or insistent on getting an answer',
 'The overall tone is neutral but persistent',
 'Speaker1 reports an issue, seem

In [53]:
cluster2_bulletpoints

['Speaker1 proposes a change, providing justifications',
 'Speaker2 disagrees, defending the original term and its usage',
 'Speaker1 concedes slightly, offering a compromise while still advocating for their preferred term, providing supporting evidence',
 'Speaker1 persists in their viewpoint, attempting to refute the others claims',
 'Speaker1 initiates the conversation with a question',
 'Speaker1 challenges a previous statement',
 'Speaker2 offers a defense',
 'Speaker1 rebuts with skepticism',
 'Speaker1 proposes an edit',
 'Speaker3 disagrees',
 'Speaker1 expresses continued skepticism but concedes temporarily',
 'Speaker1 defends themselves against perceived accusations',
 'Speaker1 reiterates their intention for accuracy',
 'Speaker1 proposes a change, providing justifications',
 'Speaker2 disagrees, defending the original term and its usage',
 'Speaker1 initiates the conversation with an accusatory tone, suggesting an edit war',
 'Speaker2 responds politely, expressing relucta