# Analyse clusters

In this step we search for insights about the clusters of attaquers

In [None]:
import json
import networkx as nx
import collections
import pandas as pd


# Load the data

In [141]:
caminho_arquivo_json = './attacks_122023.json'
with open(caminho_arquivo_json, "r", encoding="utf-8") as results_file:
    results = []
    line = results_file.readline().strip()
    while line != "":
        results.append(json.loads(line))
        line = results_file.readline().strip()

In [107]:

role_graph = nx.Graph()
bots = set()
attacks_analyzed = set()


# Collect into data frames

In [142]:
# iterate through each result
exchange_attacks = 0
count_bot_attacks = 0
for result in results:
    if result['_id']['$oid'] not in attacks_analyzed:
        attacks_analyzed.add(result['_id']['$oid'])
        if result["interface"] == "bot":
            count_bot_attacks += 1
            # shortcuts
            first_transaction = result["first_transaction"]
            second_transaction = result["second_transaction"]

            first_attacker = first_transaction["from"]
            second_ataccker = second_transaction["from"]
            bot = result["bot_address"]

            # keep the bots in a set
            bots.add(bot)

            # if the bot is new
            if not role_graph.has_node(bot):
                # add the bot node with empty stats
                role_graph.add_node(bot, role="bot", attacks=0, cost=0, profit=0)

            # if the first attacker is new
            if not role_graph.has_node(first_attacker):
                # add the first attacker node
                role_graph.add_node(first_attacker, role="attacker")

            # if the second attacker is new
            if not role_graph.has_node(second_ataccker):
                # add the second attacker node
                role_graph.add_node(second_ataccker, role="attacker")
                
            # update bot stats
            try:
                role_graph.nodes[bot]["attacks"] += 1
                role_graph.nodes[bot]["cost"] += result["cost_usd"]
                role_graph.nodes[bot]["profit"] += result["profit_usd"]

            except(KeyError):
                print("Erro")
                print(bot)
                role_graph.remove_node(bot)
                if not role_graph.has_node(bot):
                    # add the bot node with empty stats
                    role_graph.add_node(bot, role="bot", attacks=0, cost=0, profit=0)
                try:
                    role_graph.nodes[bot]["attacks"] += 1
                    role_graph.nodes[bot]["cost"] += result["cost_usd"]
                    role_graph.nodes[bot]["profit"] += result["profit_usd"]
                except(KeyError):
                    print("Erro")

            # if the edge between the first attacker and the bot is new
            if not role_graph.has_edge(first_attacker, bot):
                # add the attack edge
                role_graph.add_edge(first_attacker, bot, role="attack")

            # if the edge between the second attacker and the bot is new
            if not role_graph.has_edge(second_ataccker, bot):
                # add the attack edge
                role_graph.add_edge(second_ataccker, bot, role="attack")
        elif result["interface"] == "exchange":
            exchange_attacks +=1
attackers_count = sum(1 for node, data in role_graph.nodes(data=True) if data.get('role') == 'attacker')
bot_count = sum(1 for node, data in role_graph.nodes(data=True) if data.get('role') == 'bot')
print("number of bots ", len(bots))
print("number of bot and attackers", len(role_graph.nodes()))
print("number of attacks accounts", attackers_count)
print("number of bot", bot_count)
print("number of bot attackers verified", count_bot_attacks)
print("number of attackers ", len(results))
print("number of exchange attacks ", exchange_attacks)

number of bots  1091
number of bot and attackers 1524
number of attacks accounts 433
number of bot 1091
number of bot attackers verified 102833
number of attackers  103143
number of exchange attacks  310


In [149]:
cluster_id = 1
bot_hash_to_cluster_id = {}
attacker_hash_to_cluster_id = {}
clusters_by_id = {}
cluster_stats = []
big_cluster_count = 0

In [150]:

# iterate through each connected component of the graph
for component in nx.connected_components(role_graph):
    # count members by role
    role_counter = collections.Counter([role_graph.nodes[node]["role"] for node in component])
    
    # check if there is more than one bot
    if role_counter["bot"] > 1:
        big_cluster_count += 1
        
    cluster = []
    unique_bots_in_cluster = []
    
    attacks = 0
    cost = 0
    profit = 0
    
    # for each member of the component
    for member_hash in component:
        member_role = role_graph.nodes[member_hash]["role"]
        cluster.append({"hash": member_hash, "role": member_role})
        
        # if the member is a bot
        if member_role == "bot":
            assert member_hash not in bot_hash_to_cluster_id
            bot_hash_to_cluster_id[member_hash] = cluster_id
            
            # accumulate variables
            attacks += role_graph.nodes[member_hash]["attacks"]
            cost += role_graph.nodes[member_hash]["cost"]
            profit += role_graph.nodes[member_hash]["profit"]
            
        elif member_role == "attacker":
            assert member_hash not in attacker_hash_to_cluster_id
            attacker_hash_to_cluster_id[member_hash] = cluster_id

    # save the cluster
    clusters_by_id[cluster_id] = cluster

    # save the cluster stats
    cluster_stats.append({
        "Cluster ID": cluster_id,
        "Attacks": attacks,
        "Cost": cost,
        "Profit": profit,
        "Attacker Accounts": role_counter["attacker"],
        "Bot Contracts": role_counter["bot"]
    })

    # next cluster
    cluster_id += 1
    
print("Amount of clusters:", len(clusters_by_id))
print("Amount of clusters with more than one bot:", big_cluster_count)
print(clusters_by_id)

Amount of clusters: 252
Amount of clusters with more than one bot: 88
{1: [{'hash': '0x5292FCdbE308A2a763f567eE68798E4016822cC7', 'role': 'bot'}, {'hash': '0xfDe4e46a4f44C98de758a3E69eAEFed65b720054', 'role': 'bot'}, {'hash': '0x5230b4A7719fd909395Ef49BC045f70059c987ad', 'role': 'bot'}, {'hash': '0x278725130c5914DA10F35Ce5807C9de6BdD6bf17', 'role': 'bot'}, {'hash': '0x0cF5acE2F5eB46250B1B440ec840C4ff3a4493f5', 'role': 'bot'}, {'hash': '0x0014361413882B20040285d3A01A0a49107415f8', 'role': 'attacker'}, {'hash': '0xe57bDFD1e4389d6Dcf98056D7aD5eb53D3A4ad88', 'role': 'bot'}, {'hash': '0x6b90ed1ddf26C05FDEB79Cd56Ea9e52De9dbBe0b', 'role': 'bot'}, {'hash': '0xC86C20263e8890cF2d6a4c2CE48346E39dCc69B2', 'role': 'bot'}, {'hash': '0x57b6C00B6bfFA3001448484b56A05EFa30cdaC98', 'role': 'bot'}, {'hash': '0xD2D025d56917384212a39E38337d1Be62453659C', 'role': 'bot'}, {'hash': '0x2F5e6CEDcCC8313349Bf259C55aD5d013b2B0774', 'role': 'bot'}, {'hash': '0x5d4F758F4c18bCF1f3c74052F6F023F3879A1161', 'role': 'bot'

In [None]:

# Convert clusters_by_id to a dictionary list
flattened_clusters = []
for cluster_id, nodes in clusters_by_id.items():
    for node in nodes:
        flattened_clusters.append({"Cluster ID": cluster_id, "Hash": node["hash"], "Role": node["role"]})

# Create DataFrame from dictionary list
df_clusters = pd.DataFrame(flattened_clusters)

# Save the DataFrame to an Excel file
excel_file_path = './information_clusters_id.xlsx'
df_clusters.to_excel(excel_file_path)



# Environment Settings

In [120]:
!pip install scipy




[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [122]:
!pip install matplotlib




[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [123]:
!pip install networkx




[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [124]:
!pip install pandas





[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [125]:
!pip install openpyxl




[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip
