In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import random


def load_data(file_path):
    df = pd.read_csv(file_path)
    df.columns = ["thread_subject", "username", "page_name"]
    return df


def build_network(df):
    G = nx.Graph()
    grouped = df.groupby(["page_name", "thread_subject"])
    for (page, thread), group in grouped:
        users = group["username"].unique()
        for i in range(len(users)):
            for j in range(i + 1, len(users)):
                G.add_edge(users[i], users[j])
    return G


def detect_trolls(file_path, title="Network"):
    print(f"\n--- Running Troll Detection on: {title} ---")
    df = load_data(file_path)
    G = build_network(df)

    if G.number_of_nodes() < 2:
        print("Graph too small to run troll detection.")
        return

    # Randomly select two editors
    selected_editors = random.sample(list(G.nodes()), 2)
    print(f"Selected editors for monitoring: {selected_editors}")

    # Priority: highly connected neighbors of the first editor
    priority_list = sorted(G.neighbors(selected_editors[0]), key=lambda n: G.degree(n), reverse=True)
    print(f"Priority list of neighbors to check (top 10): {priority_list[:10]}")

    # Simulate potential spread from both selected editors
    for editor in selected_editors:
        shortest_paths = nx.single_source_shortest_path_length(G, editor, cutoff=3)
        spread_targets = list(shortest_paths.keys())[:10]
        print(f"Potential spread from {editor} (top 10 nodes): {spread_targets}")


detect_trolls("data/datasets/PROJECT_CHAT.csv", title="Large Network")
detect_trolls("data/datasets/ADMINISTRATORS.csv", title="Medium Network")
detect_trolls("data/datasets/INTERWIKI_CONFLICT.csv", title="Small Network")


--- Running Troll Detection on: Large Network ---
Selected editors for monitoring: ['173.254.28.43', 'Yurik']
Priority list of neighbors to check (top 10): ['Ymblanter', 'Mbch331', '195.191.250.161', 'Джерри К.Г', '69.163.34.169', '45.124.64.155']
Potential spread from 173.254.28.43 (top 10 nodes): ['173.254.28.43', '195.191.250.161', 'Mbch331', 'Ymblanter', 'Джерри К.Г', '69.163.34.169', '45.124.64.155', 'Jobu0101', 'Pigsonthewing', 'Pasleim']
Potential spread from Yurik (top 10 nodes): ['Yurik', 'Ymblanter', 'Jeblad', 'Filceolaire', 'Kolja21', 'Tpt', 'Addshore', 'Hazard-SJ', 'Kipcool', 'Kipmaster']

--- Running Troll Detection on: Medium Network ---
Selected editors for monitoring: ['Chrisahn', 'Aditya Kabir']
Priority list of neighbors to check (top 10): ['Ymblanter', 'Jura1', 'Bovlb']
Potential spread from Chrisahn (top 10 nodes): ['Chrisahn', 'Bovlb', 'Ymblanter', 'Jura1', 'MisterSynergy', 'Til Eulenspiegel', 'Mahir256', 'Kubura', 'Yupik', 'Dipsacus fullonum']
Potential spread fr