# This is the code to construct the graph
- Including main graph and neighbor nodes(currently ``one hop`` only)  
- ``Main graph is red``, including nodes and edges  
- Others would be mapped to other colors
> Possible Issue: If label is T1002.001 -> 1002.001 -> T is gone



## Draw all the graph without benign

In [1]:
import os
import numpy as np
import networkx as nx
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import graphviz_layout
import matplotlib.patches as mpatches


def read_data(file_path):
    edges = []
    with open(file_path, "r") as file:
        for line in file:
            source, destination, relation, label = line.strip().split(',')
            edges.append((source, destination, relation, label))
    # edges is a list contains all the information (each set is stored in a tuple)
    return edges


def get_other_labels_edges(full_edges, related_nodes, target_label):
    other_labels_edges = []
    for edge in full_edges:
        source, destination, relation, label = edge
        if label != target_label and (source in related_nodes or destination in related_nodes):
            other_labels_edges.append(edge)
    return other_labels_edges


def get_related_nodes(edges, target, is_label=True):
    related_nodes = set()

    for edge in edges:
        source, destination, relation, label = edge
        if (is_label and label == target) or (not is_label and (source == target or destination == target)):
            related_nodes.add(source)
            related_nodes.add(destination)

    return related_nodes

### Draw graph function

In [13]:

def draw_graph(full_edges, target_edges, file_name, target_label, title=None, figsize=(10, 8), dpi=100):
    related_nodes = get_related_nodes(target_edges, title)  # only consider edges with target_label
    other_labels_edges = get_other_labels_edges(full_edges, related_nodes, title)

    # Separate edges into main graph and others
    main_edges = [edge for edge in target_edges if edge[-1] == target_label]
    other_edges = [edge for edge in target_edges if edge[-1] != target_label] + other_labels_edges

    G = nx.DiGraph()
    edge_labels = {}
    node_colors = {}
    edge_colors = {}

    # Extract unique labels (e.g., 'T1115_707' from 'T1115_70795de7cbb842edb029b3378c27c008')
    unique_labels = list(set([label.split('_')[0] + '_' + label.split('_')[1][:3] for _, _, _, label in other_edges]))  

    # Convert the target_label to same format as other labels
    target_label_modified = target_label.split('_')[0] + '_' + target_label.split('_')[1][:3]

    # Create a color map with unique colors for each label
    colors = cm.rainbow(np.linspace(0, 1, len(unique_labels)+1))
    red_color = np.array([1, 0, 0, 1])  # Red color
    color_map = {label: color for label, color in zip(unique_labels, colors) if not np.array_equal(color, red_color)}
    color_map[target_label_modified] = red_color  # Assign red color to the target label separately

    # Add other edges
    for source, destination, relation, label in other_edges:
        edge = (source, destination)
        label_prefix = label.split('_')[0]  # Get the first element after splitting by '_'
        label_prefix_extended = label_prefix + "_" + label.split('_')[1][:3]  # Add "_" and the first three characters after the second '_' to the label_prefix

        if edge in G.edges():
            edge_labels[edge] += ", " + relation
        else:
            G.add_edge(source, destination)

            # Check if the edge is a part of the target edges (main graph)
            if label == target_label:
                node_colors[source] = 'red'
                node_colors[destination] = 'red'
                edge_colors[edge] = 'red'
            else:
                node_colors[source] = color_map.get(label_prefix_extended, 'gray')  # Set gray as the default color
                node_colors[destination] = color_map.get(label_prefix_extended, 'gray')  # Set gray as the default color
                edge_colors[edge] = color_map.get(label_prefix_extended, 'black')

            edge_labels[edge] = relation

    # Add main graph edges
    for source, destination, relation, label in main_edges:
        edge = (source, destination)
        G.add_edge(source, destination)
        node_colors[source] = 'red'
        node_colors[destination] = 'red'
        edge_colors[edge] = 'red'

        if edge in edge_labels:
            edge_labels[edge] += ", " + relation
        else:
            edge_labels[edge] = relation

    pos = graphviz_layout(G, prog="dot")
    pos = {node: (x, y-0.1) for node, (x, y) in pos.items()}

    plt.figure(figsize=figsize)

    # Draw nodes with custom label styles
    node_labels = {node: rf"$\bf{{{node}}}$" for node in G.nodes()}
    nx.draw(G, pos, with_labels=False, node_size=1500, font_size=8, node_color=[node_colors[node] for node in G.nodes()], edge_color=[edge_colors[edge] for edge in G.edges()], arrowsize=10, font_color='black')
    nx.draw_networkx_labels(G, pos, labels=node_labels, font_size=8, font_weight='bold', font_color='black')

    # Draw edges with custom edge labels
    edge_labels = {(source, destination): f"({len(relations.split(','))})" for (source, destination), relations in edge_labels.items()}
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)

    if title:
        plt.text(0.05, 0.95, title, transform=plt.gca().transAxes, fontsize=12, fontweight='bold', verticalalignment='top')

    # for labeling all the label with the corresponding color
    patches = [mpatches.Patch(color=color, label=label) for label, color in color_map.items()]
    plt.legend(handles=patches)

    plt.savefig(file_name + ".png", dpi=dpi)
    plt.clf()


- Benign version

In [5]:
def draw_graph(full_edges, target_edges, file_name, target_label, title=None, figsize=(10, 8), dpi=100):
    related_nodes = get_related_nodes(target_edges, title)  # only consider edges with target_label
    other_labels_edges = get_other_labels_edges(full_edges, related_nodes, title)

    # Separate edges into main graph and others
    main_edges = [edge for edge in target_edges if edge[-1] == target_label]
    other_edges = [edge for edge in target_edges if edge[-1] != target_label] + other_labels_edges

    G = nx.DiGraph()
    edge_labels = {}
    node_colors = {}
    edge_colors = {}

    # Extract unique labels (e.g., 'T1115_707' from 'T1115_70795de7cbb842edb029b3378c27c008')
    unique_labels = list(set([label.split('_')[0] + '_' + label.split('_')[1][:3] if label != '0' else 'benign' for _, _, _, label in other_edges]))  

    # Convert the target_label to same format as other labels
    target_label_modified = target_label.split('_')[0] + '_' + target_label.split('_')[1][:3] if target_label != '0' else 'benign'

    # Create a color map with unique colors for each label
    colors = cm.rainbow(np.linspace(0, 1, len(unique_labels)+1))
    red_color = np.array([1, 0, 0, 1])  # Red color
    color_map = {label: color for label, color in zip(unique_labels, colors) if not np.array_equal(color, red_color)}
    color_map[target_label_modified] = red_color  # Assign red color to the target label separately
    color_map['benign'] = 'skyblue'  # Assign black color to the benign label

    # Add other edges
    for source, destination, relation, label in other_edges:
        edge = (source, destination)

        if label != '0':
            label_prefix = label.split('_')[0]  # Get the first element after splitting by '_'
            label_prefix_extended = label_prefix + "_" + label.split('_')[1][:3]  # Add "_" and the first three characters after the second '_' to the label_prefix
        else:
            label_prefix_extended = 'benign'

        if edge in G.edges():
            edge_labels[edge] += ", " + relation
        else:
            G.add_edge(source, destination)

            # Check if the edge is a part of the target edges (main graph)
            if label == target_label:
                node_colors[source] = 'red'
                node_colors[destination] = 'red'
                edge_colors[edge] = 'red'
            else:
                node_colors[source] = color_map.get(label_prefix_extended, 'gray')  # Set gray as the default color
                node_colors[destination] = color_map.get(label_prefix_extended, 'gray')  # Set gray as the default color
                edge_colors[edge] = color_map.get(label_prefix_extended, 'black')

            edge_labels[edge] = relation

    # Add main graph edges
    for source, destination, relation, label in main_edges:
        edge = (source, destination)
        G.add_edge(source, destination)
        node_colors[source] = 'red'
        node_colors[destination] = 'red'
        edge_colors[edge] = 'red'

        if edge in edge_labels:
            edge_labels[edge] += ", " + relation
        else:
            edge_labels[edge] = relation

    pos = graphviz_layout(G, prog="dot")
    pos = {node: (x, y-0.1) for node, (x, y) in pos.items()}

    plt.figure(figsize=figsize)

    # Draw nodes with custom label styles
    node_labels = {node: rf"$\bf{{{node}}}$" for node in G.nodes()}
    nx.draw(G, pos, with_labels=False, node_size=1500, font_size=8, node_color=[node_colors[node] for node in G.nodes()], edge_color=[edge_colors[edge] for edge in G.edges()], arrowsize=10, font_color='black')
    nx.draw_networkx_labels(G, pos, labels=node_labels, font_size=8, font_weight='bold', font_color='black')

    # Draw edges with custom edge labels
    edge_labels = {(source, destination): f"({len(relations.split(','))})" for (source, destination), relations in edge_labels.items()}
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)

    if title:
        plt.text(0.05, 0.95, title, transform=plt.gca().transAxes, fontsize=12, fontweight='bold', verticalalignment='top')

    # for labeling all the label with the corresponding color
    patches = [mpatches.Patch(color=color, label=label) for label, color in color_map.items()]
    plt.legend(handles=patches)

    plt.savefig(file_name + ".png", dpi=dpi)
    plt.clf()


In [6]:
def get_related_nodes(edges, target_label):
    related_nodes = set()

    for edge in edges:
        if edge[3] == target_label:
            related_nodes.add(edge[0])
            related_nodes.add(edge[1])

    return related_nodes

def get_other_labels_edges(full_edges, related_nodes, target_label):
    other_labels_edges = []
    for edge in full_edges:
        source, destination, relation, label = edge
        if label != target_label and (source in related_nodes or destination in related_nodes):
            other_labels_edges.append(edge)
    return other_labels_edges


def is_valid_target_label(target_label):
    parts = target_label.split('_')
    return len(parts) >= 2 and len(parts[0]) > 0 and len(parts[1]) > 0

def draw_all_graphs(file_path):
    full_edges = read_data(file_path) 

    unique_labels = set(edge[3] for edge in full_edges)

    os.makedirs("./graph_test5/", exist_ok=True)
    
    # every loop has it's own target label(unique)
    for target_label in unique_labels:
        if not is_valid_target_label(target_label):

            # Here we meet a label: "label" WTF -> solve this in the future
            print(f"Invalid target_label: {target_label}") 
            continue

        target_edges = [edge for edge in full_edges if edge[3] == target_label]

        # file_name = f"./graph_test4/{target_label}"
        file_name = f"./graph_test5/{target_label}"
        draw_graph(full_edges, target_edges, file_name, target_label, title=target_label)
        print(f"{file_name}.png has been generated!")

# file_path = "./data/filtered_labeled_data_v2.txt"
file_path = "./data/labeled_data_benign_AP.txt"

draw_all_graphs(file_path)
print("DONE!!")

./graph_test5/T1499_2fe2d5e6-7b06-4fc0-bf71-6966a1226731.png has been generated!
./graph_test5/T1105_60f63260-39bb-4136-87a0-b6c2dca799fc.png has been generated!
./graph_test5/T1531_b25ae80dad74142fafb510e9c1949ace.png has been generated!
./graph_test5/T1491_47d08617-5ce1-424a-8cc5-c9c978ce6bf9.png has been generated!
./graph_test5/T1057_f8de05d1741dcc468f772ab0ff4dac72.png has been generated!
./graph_test5/1003.003_9f73269695e54311dd61dc68940fb3e1.png has been generated!
./graph_test5/1547.004_aa147165f6c116cb0b0f944abe1db8ce.png has been generated!
./graph_test5/1562.004_8fe59e288f10a486dc8b44bc872019ff.png has been generated!
./graph_test5/T1201_57296a2ddbeb7423c05feef2fe972111.png has been generated!


  plt.savefig(file_name + ".png", dpi=dpi)


./graph_test5/T1560_a1ee301b0508747b468d578a14e5c1a5.png has been generated!
./graph_test5/T1219_af8cb2bf9b436aae5c106a0a9c207e14.png has been generated!
./graph_test5/1074.001_e6dfc7e89359ac6fa6de84b0e1d5762e.png has been generated!
./graph_test5/T1490_2d53d6fabd39bf9c70b0dfcdfbbc926d.png has been generated!
./graph_test5/1055.002_e5bcefee-262d-4568-a261-e8a20855ec81.png has been generated!
./graph_test5/1518.001_b8453a5fe06b24aea12b27592d5c3d3a.png has been generated!
./graph_test5/1547.001_777043894e42d2aae3881e63f6c76d33.png has been generated!
./graph_test5/1087.001_6334877e8e3ba48f7835d4856d90a282.png has been generated!
./graph_test5/T1125_da86001b5081fcf773d8e62f22cf2b00.png has been generated!
./graph_test5/1003.001_35d92515122effdd73801c6ac3021da7.png has been generated!
./graph_test5/T1518_c9be8043-a445-4cbf-b77b-ed7bb007fc7c.png has been generated!


  plt.figure(figsize=figsize)


./graph_test5/1053.005_ee454be9197890de62705ce6255933fd.png has been generated!
./graph_test5/T1547_fe9eeee9a7b339089e5fa634b08522c1.png has been generated!
./graph_test5/T1057_b2a1e430ca6d36eb5af2fe666e769847.png has been generated!
./graph_test5/1074.001_6469befa-748a-4b9c-a96d-f191fde47d89.png has been generated!
./graph_test5/1059.003_f38e58deb7ad20b5538ca40db7b7b4f8.png has been generated!
./graph_test5/T1135_530e47c6-8592-42bf-91df-c59ffbd8541b.png has been generated!
./graph_test5/T1490_c156ac5c9fa67080365268d95f29053d.png has been generated!
./graph_test5/1003.003_f049b89533298c2d6cd37a940248b219.png has been generated!
./graph_test5/T1112_ba6f6214dbd17c54001e0a163b60f151.png has been generated!
./graph_test5/1562.004_8d0a4585e7c4646185a912b14cd9cb46.png has been generated!
./graph_test5/T1105_0856c235a1d26113d4f2d92e39c9a9f8.png has been generated!
./graph_test5/1518.001_2dece965-37a0-4f70-a391-0f30e3331aba.png has been generated!
./graph_test5/T1112_e74d2fb4ef5fa6c766a4151554

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

## Draw all the Graph