# Análise de Comportamento de Bots Suspeitos

In [1]:
# dependências
import numpy as np
import pandas as pd
import networkx as nx
from networkx.algorithms import community

import os
os.chdir("..")

from util.load_graph import load_graph_by_edge, get_driver, get_user_property_keys, get_user_properties, get_property_values, get_property_values_for_users

In [2]:
driver = get_driver()

In [3]:
suspected_bots = pd.read_csv('../data/all_suspected_bots.csv')
suspected_bots

Unnamed: 0,user_id
0,29e69b377b956d32f22189b16e6bc4c7
1,6adfedbb5ebea15b89d785ce436f6bc0
2,258b85b2176ee80d8980beeeddafe903
3,097eb71335c0d926761b880511f1b198
4,96dd9aa3ec0b34b3960f2e1e939a8d64
...,...
1322,9a20331e046807a4297fb0ce86201b05
1323,483338a6441c94f1c2eed5fa76cf3563
1324,b0d331018599ab17c4b665f00c40f031
1325,2d6b4ae98705a6e876e11459d1f55901


In [4]:
high_conf_suspected_bots = pd.read_csv('../data/highest_confident_bots.csv')
high_conf_suspected_bots

Unnamed: 0,user_id
0,2dcee295aa11d8e1c2440a48962d8d59
1,8d1a3c5b2ba0f29df8a6a5be0bb66191
2,4beaaa8daf1704fcf3cef544d36f6509
3,8bbd902ea88b4a657af914fdc7d34286
4,916ba98d6bfdb5454531ce28e97793cc
5,497b4a8175118ce025ca715136974c60
6,0bb15deb79204136d3bbe5b6356ea809
7,826c407c3fc0646f32523e05bd57b459
8,4d825174847c8e2d7db1c77a58e9b725
9,c923ed78ee5cf2ecbd55ab500f19a5d4


## Comparação Scores Gerais vs Suspeitos

In [5]:
def compare_property_averages(driver, suspected_bots_df, user_id_col='user_id'):
    sample_user = suspected_bots_df[user_id_col].iloc[0]
    properties = get_user_properties(driver, sample_user)

    results = []

    for prop, val in properties.items():
        if not isinstance(val, (int, float)):
            continue

        all_vals = get_property_values(driver, prop)
        susp_vals = get_property_values_for_users(
            driver, prop, suspected_bots_df[user_id_col]
        )

        if len(all_vals) == 0 or len(susp_vals) == 0:
            continue

        results.append({
            "property": prop,
            "avg_all_users": np.mean(all_vals),
            "avg_suspected_bots": np.mean(susp_vals),
            "ratio_susp_vs_all": np.mean(susp_vals) / np.mean(all_vals)
        })

    df = pd.DataFrame(results).sort_values(
        by="ratio_susp_vs_all", ascending=False
    )

    return df

In [6]:
compare_property_averages(driver, suspected_bots)

Unnamed: 0,property,avg_all_users,avg_suspected_bots,ratio_susp_vs_all
6,viral_score,0.006145,0.044924,7.310932
3,misinfo_score,0.132189,0.540832,4.091353
5,shares_score,0.522397,1.678166,3.212435
2,metronome_score,1.484604,1.473839,0.992749
0,content_originality,0.936023,0.806489,0.861612
4,network_diversity,0.558147,0.431893,0.773797
1,content_uniqueness,0.70584,0.313629,0.444335


In [7]:
compare_property_averages(driver, high_conf_suspected_bots)

Unnamed: 0,property,avg_all_users,avg_suspected_bots,ratio_susp_vs_all
7,viral_score,0.006145,0.140778,22.910117
3,misinfo_score,0.132189,1.464348,11.077685
5,shares_score,0.522397,3.008423,5.758884
2,metronome_score,1.484604,1.356172,0.913491
0,content_originality,0.936023,0.756242,0.807931
6,synchronicity_score,9.80544,7.686507,0.783902
4,network_diversity,0.558147,0.201345,0.360738
1,content_uniqueness,0.70584,0.059854,0.084799
