In [666]:
import warnings
import plotly.io as pio
import numpy as np
import dash
from dash import dcc, html, Input, Output, State
import plotly.graph_objects as go
import pandas as pd
import math
import random
import networkx as nx
import pickle
import os
import matplotlib.pyplot as plt
from functools import lru_cache
from plotly.colors import qualitative
from collections import defaultdict
from collections import Counter
from matplotlib.colors import BoundaryNorm,ListedColormap

In [667]:
# selected_state = ['DELAWARE', 'MARYLAND', 'VIRGINIA']
'''
################## 1. read all attributes ##################
'''

def read_all_attributes():
    # read edges and weights from overall_tree_vis_Deckard_final.csv
    overall_tree_df = pd.read_csv("data/overall_tree_vis_Deckard_final.csv",
                                  names=["proj_1","proj_2","weight"], header=0)
    overall_tree_df["proj_1"] = overall_tree_df["proj_1"].astype(str).str.strip()
    overall_tree_df["proj_2"] = overall_tree_df["proj_2"].astype(str).str.strip()

    min_w = overall_tree_df["weight"].min()
    max_w = overall_tree_df["weight"].max()

    if min_w == max_w:
        overall_tree_df["weight"] = 50
    else:
        overall_tree_df["weight"] = (
            ((overall_tree_df["weight"] - min_w) / (max_w - min_w)) * 99 + 1
        ).round().astype(int)


    print("processed data from overall_tree_vis_Deckard_final.csv:")
    print(overall_tree_df)


    # read proj_name, date, year from MASCOT.xlsx and types_year.csv
    date_cols = ["Source folder name", "Date"]
    date_df1 = pd.read_excel("data/MASCOT.xlsx", sheet_name="MASCOT", usecols=date_cols)
    date_df1 = date_df1.rename(columns={"Source folder name": "proj_name", "Date":"date"})
    date_df1["year"] = pd.to_datetime(date_df1["date"], errors='coerce').dt.year
    print(date_df1)

    date_df2 = pd.read_csv("data/types_year.csv", usecols=[0, 1], names=["name","year"], header=0)
    date_df2 = date_df2.rename(columns={"name":"proj_name"})
    date_df2["date"] = pd.to_datetime(date_df2["year"].astype(str), format="%Y", errors='coerce')

    date_df = pd.concat([date_df1, date_df2], ignore_index=True)
    date_df = date_df.drop_duplicates(subset="proj_name", keep="first").reset_index(drop=True)
    date_df["year"] = date_df["year"].astype("Int64")
    date_df["proj_name"] = date_df["proj_name"].astype(str).str.strip()


    print([name for name in date_df['proj_name'].unique() if 'wke_ForBot' in name])

    print("data from Windows sheet of MASCOT.xlsx and types_year.csv:")
    print(date_df)
    print("data without date：")
    print(date_df[date_df['date'].isna()])


    # read proj_name, 8 types name
    # tags_cols = ["Source Folder Name", "FUD?", "FILE", "UNK", "FAM", "VULN","BEH", "CLASS", "PACK"]
    tags_df1 = pd.read_excel("data/MASCOT_LABELS.xlsx", sheet_name="MASCOT_C&Cpp", usecols=tags_cols)
    tags_df1 = tags_df1.rename(columns={"Source Folder Name": "proj_name"})
    tags_df2 = pd.read_excel("data/MASCOT_LABELS.xlsx", sheet_name="MASCOT_Assembly", usecols=tags_cols)
    tags_df2 = tags_df2.rename(columns={"Source Folder Name": "proj_name"})
    tags_df3 = pd.read_excel("data/MASCOT_LABELS.xlsx", sheet_name="MASCOT_CSharp", usecols=tags_cols)
    tags_df3 = tags_df3.rename(columns={"Source Folder Name": "proj_name"})
    tags_df4 = pd.read_excel("data/MASCOT_LABELS.xlsx", sheet_name="MASCOT_Python", usecols=tags_cols)
    tags_df4 = tags_df4.rename(columns={"Source Folder Name": "proj_name"})
    tags_df5 = pd.read_excel("data/MASCOT_LABELS.xlsx", sheet_name="MalSource_C&Cpp", usecols=tags_cols)
    tags_df5 = tags_df5.rename(columns={"Source Folder Name": "proj_name"})
    tags_df = pd.concat([tags_df1, tags_df2, tags_df3, tags_df4, tags_df5], ignore_index=True)
    tags_df["proj_name"] = tags_df["proj_name"].astype(str).str.strip()
    print("data from MASCOT_LABELS.xlsx:")
    print(tags_df)

    # duplicate_proj = tags_df[tags_df.duplicated(subset='proj_name', keep=False)]
    # print("duplicate proj_name：")
    # print(duplicate_proj)

    return overall_tree_df, tags_df, date_df

In [668]:
'''
################## 2. process tags ##################
'''

def process_each_tag(cell, is_FILE=False):
    if pd.isna(cell):
        return "unlabeled", -1

    cell = str(cell).strip()
    if is_FILE:
        cell = cell.replace("os:", "").replace("proglang:", "")  # 处理 os:windows, os:linux 标签
    if "," in cell:
        parts = cell.split(',')
        max_label = "unlabeled"
        max_value = -1
        for part in parts:
            if '|' in part:
                label, value = part.split('|')
                value = int(value)

                if is_FILE and label == "windows":
                    continue

                if value > max_value:
                    max_value = value
                    max_label = label
        return max_label, max_value
    elif "|" in cell:
        label, value = cell.split('|')
        return label, int(value)
    else:
        return cell, -1

# process tags to tags_name and tags_weight
def process_tags(tags_df):
    for tag in tags_cols[1:]:
        is_FILE = (tag == "FILE")
        # is_FUD = (tag=="")
        tags_df[[tag, tag+"_weight"]]= tags_df[tag].apply(lambda x: pd.Series(process_each_tag(x, is_FILE=is_FILE)))
        tags_df[f"{tag}_weight"] = pd.to_numeric(tags_df[f"{tag}_weight"],
                                                 errors='coerce').fillna(-1).astype(int)

    print("data from processed MASCOT_LABELS.xlsx:")
    print(tags_df)
    print("one data from processed MASCOT_LABELS.xlsx:")
    print(tags_df.iloc[148])

    return tags_df

In [669]:
'''
################## 3. merge data to get metadata and clean datasets ##################
'''

def get_metadata(tags_df,date_df,overall_tree_df):

    # merge date_df and tags_df to meta_df
    meta_df = pd.merge(tags_df, date_df, on="proj_name", how='outer')
    # meta_df["FUD?_weight"] = meta_df["FUD?_weight"].astype("Int64")
    print("merged data from MASCOT.xlsx, types_year.csv, and processed MASCOT_LABELS.xlsx:")
    print(meta_df)


    # find data that whose proj_1 or proj_2 is null
    missing_proj_rows = overall_tree_df[
        overall_tree_df["proj_1"].isna() | overall_tree_df["proj_2"].isna()
    ]

    print("Rows where proj_1 or proj_2 is NaN:")
    print(missing_proj_rows)

    # find data that those "proj_1" or "proj_2" in overall_tree_df, while not in meta_df's "proj_name"
    meta_proj_set = set(meta_df["proj_name"].dropna().astype(str).str.strip())
    missing_proj_1 = overall_tree_df[~overall_tree_df["proj_1"].isin(meta_proj_set)]
    missing_proj_2 = overall_tree_df[~overall_tree_df["proj_2"].isin(meta_proj_set)]
    missing_projects = pd.concat([missing_proj_1[["proj_1"]], missing_proj_2[["proj_2"]]])
    missing_projects = pd.unique(missing_projects.values.ravel())

    print("Projects in overall_tree_df but NOT in meta_df:")
    print(missing_projects)
    # the result is ['wke_ForBot', 'wke_litmus2-bot_2' nan 'wke_nzmlite_symantec']

    # clean data
    overall_tree_df = overall_tree_df[
        (overall_tree_df["proj_1"] != "wke_ForBot") &
        (overall_tree_df["proj_2"] != "wke_ForBot")
    ]

    meta_df["proj_name"] = meta_df["proj_name"].replace("wke_litmus2-bot_2_", "wke_litmus2-bot_2")
    meta_df["proj_name"] = meta_df["proj_name"].replace("wke_nzmlite_symantec___", "wke_nzmlite_symantec")



    # process merged data
    for tag in tags_cols[1:]:
        meta_df[tag] = meta_df[tag].fillna("unlabeled")
        meta_df[tag + "_weight"] = meta_df[tag + "_weight"].fillna(-1).astype(int)

    print("processed merged data from MASCOT.xlsx and processed MASCOT_LABELS.xlsx:")
    print(meta_df)
    print([name for name in meta_df['proj_name'].unique() if 'wke_ForBot' in name])


    # data validate
    print("data without date：")
    print(meta_df[meta_df['date'].isna()])

    print("edges without date：")
    missing_time_projects = meta_df[meta_df["year"].isna()]["proj_name"].unique()
    missing_from_proj_1 = overall_tree_df[overall_tree_df["proj_1"].isin(missing_time_projects)]
    missing_from_proj_2 = overall_tree_df[overall_tree_df["proj_2"].isin(missing_time_projects)]

    missing_overall = pd.concat([missing_from_proj_1, missing_from_proj_2]).drop_duplicates()
    print("edges whose proj1 or proj2 lack date info：")
    print(missing_overall)

    return meta_df, overall_tree_df

In [670]:
# read data, process data and clean data
tags_cols = ["Source Folder Name", "FUD?", "FILE", "UNK", "FAM", "VULN","BEH", "CLASS", "PACK"]
overall_tree_df, tags_df, date_df = read_all_attributes()
tags_df = process_tags(tags_df)
meta_df, overall_tree_df = get_metadata(tags_df,date_df,overall_tree_df)

processed data from overall_tree_vis_Deckard_final.csv:
                                  proj_1                proj_2  weight
0       2021-09-11-reverse-shells-master                    42       1
1       2021-09-11-reverse-shells-master              ADR-main       1
2       2021-09-11-reverse-shells-master      AMR-Reverse-main       1
3       2021-09-11-reverse-shells-master                 Alina       1
4       2021-09-11-reverse-shells-master         AzureECX-main       1
...                                  ...                   ...     ...
277651                           woolien  wri_keylogger-master       1
277652                           woolien       wxp78key-master       1
277653                           woolien            xTBot0.0.2       1
277654              wri_keylogger-master       wxp78key-master       1
277655              wri_keylogger-master            xTBot0.0.2       1

[277656 rows x 3 columns]
                                  proj_name       date  year
0   

In [671]:
'''
################## 4.prepare mapping dicts，info data for recall functions and prepare for creating graphic ##################
'''

def get_label_size_cache(label_list,meta_df):
    LABEL_SIZE_CACHE = {}
    for lab in label_list:
        w = meta_df[f"{lab}_weight"].fillna(-1)
        # w = pd.to_numeric(
        #         meta_df[f"{lab}_weight"], errors="coerce"
        #     ).fillna(-1).astype(int)
        uniq = np.sort(w.unique())
        print("label_weight:", uniq)

        if len(uniq) <= 1:
            LABEL_SIZE_CACHE[lab] = {uniq[0]: 10}
            continue

        if len(uniq) < 4:
            sizes = np.linspace(10, 20, len(uniq)).astype(int)
            LABEL_SIZE_CACHE[lab] = dict(zip(uniq, sizes))
            continue

        # generally：using qcut spilt 4 bins → 15 20 25 30
        ranks = pd.Series(uniq).rank(method="first")          # turn to 1..n rank
        bins  = pd.qcut(ranks, 4, labels=[10, 15, 20, 22])    # get bins ≤4 bins
        LABEL_SIZE_CACHE[lab] = dict(zip(uniq, bins.astype(int)))

    return LABEL_SIZE_CACHE


def get_unique_years(overall_tree_df,meta_df):
    # get all projects from proj_1 and proj_2
    involved_projects = pd.unique(overall_tree_df[["proj_1", "proj_2"]].values.ravel())

    # get projects' date and year from meta_df
    involved_years = meta_df[meta_df["proj_name"].isin(involved_projects)]["year"].dropna().astype(int)

    unique_years = sorted(involved_years.unique())

    return unique_years


def get_edge_weight_map(overall_tree_df):
    unique_edge_weights = sorted(overall_tree_df["weight"].unique())
    edge_weights_ranks = pd.Series(unique_edge_weights).rank(method="first")
    bins = pd.qcut(edge_weights_ranks, 3, labels=[1.0, 1.5, 2])
    edge_weight_map = dict(zip(unique_edge_weights, bins.astype(int)))

    return edge_weight_map

# prepare for draw graph
label_list = ["FUD?", "FILE", "UNK", "FAM", "VULN", "BEH", "CLASS", "PACK"]
proj_to_labels  = {lab: dict(zip(meta_df["proj_name"], meta_df[lab]))  for lab in label_list}
proj_to_weights = {lab: dict(zip(meta_df["proj_name"], meta_df[f"{lab}_weight"]))  for lab in label_list}

LABEL_SIZE_CACHE = get_label_size_cache(label_list,meta_df)

proj_to_date = dict(zip(meta_df["proj_name"], meta_df["date"]))
proj_to_year = dict(zip(meta_df["proj_name"], meta_df["year"]))
max_year = max(overall_tree_df['proj_1'].map(proj_to_year).max(),
               overall_tree_df['proj_2'].map(proj_to_year).max())
min_year = min(overall_tree_df['proj_1'].map(proj_to_year).min(),
               overall_tree_df['proj_2'].map(proj_to_year).min())
print("max_year: ", max_year, "; min_year: ", min_year)

# compute unique years and rank order
unique_years = get_unique_years(overall_tree_df,meta_df)

# mapping each year to a size（i.e., 15, 15.5, 16, ...）
year_size_map = {year: 15 + i * 0.5 for i, year in enumerate(unique_years)}

edge_weight_map = get_edge_weight_map(overall_tree_df)


label_weight: [-1]
label_weight: [ -1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  28  29  30  32  35  36  38  39  40  46  50
  51  52  55  62  65  79  88 121]
label_weight: [-1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 23 27 30
 35 36 44]
label_weight: [-1  2  3  4  5  6  7  8  9 11 12 13 17 18 19 23 26 33 36]
label_weight: [-1  1  2  3  4  6  7  8 11 12]
label_weight: [-1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 19 20 22 24 25 26 27
 29 33 80]
label_weight: [-1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 19 20 21 22 23 24 26
 27 28 29 33 35 37 43 51 54 55 61 75]
label_weight: [-1  1]
max_year:  2025 ; min_year:  1997


In [672]:
'''
################## 5. create fixed network graphic ##################
'''

def create_fixed_network(meta_df, overall_tree_df):
    G = nx.DiGraph()
    node_colors = {}
    for _, row in meta_df.iterrows():
        if row["proj_name"] not in G.nodes:
                G.add_node(row["proj_name"], year=row["year"], date = row["date"])

    for _, row in overall_tree_df.iterrows():
        # if proj_to_date[row['proj_1']] <= proj_to_date[row['proj_2']] and row["weight"]>500:
        # if proj_to_date[row['proj_1']] <= proj_to_date[row['proj_2']] and row["weight"]>q90:
        if proj_to_date[row['proj_1']] <= proj_to_date[row['proj_2']]:
            G.add_edge(
                row['proj_1'], row['proj_2'],
                weight = row['weight']
                    )
        # elif proj_to_date[row['proj_1']] > proj_to_date[row['proj_2']] and row["weight"]>500:
        # elif proj_to_date[row['proj_1']] > proj_to_date[row['proj_2']] and row["weight"]>q90:
        elif proj_to_date[row['proj_1']] > proj_to_date[row['proj_2']]:
            G.add_edge(
                row['proj_2'], row['proj_1'],
                weight = row['weight']
                    )
    return G

G = create_fixed_network(meta_df, overall_tree_df)

In [673]:
'''
################## 6. read functions info ##################
'''

def read_funcs(G):
    funcs_tag = pd.read_csv("data/func_tag_result.csv", header=0,
                     names=['proj', 'func', 'tag'])

    tag_map = {}

    for _, row in funcs_tag.iterrows():
        tag = row['tag']
        if pd.isna(tag):
            tag = " "
        tag_map[(row['proj'], row['func'])] = tag

    funcs_weight = pd.read_csv("data/top5funcspairs_Deckard_final.csv", header=0,
                     names=['proj1', 'func1', 'proj2', 'func2', 'weight'])

    funcs_info = defaultdict(list)
    for _, row in funcs_weight.iterrows():
        proj1 = row['proj1']
        proj2 = row['proj2']
        func1 = row['func1']
        func2 = row['func2']
        weight = row['weight']
        tag1 = tag_map.get((proj1, func1), " ")
        tag2 = tag_map.get((proj2, func2), " ")

        if G.has_edge(proj1, proj2):
            funcs_info[(proj1, proj2)].append((func1, tag1, func2, tag2, weight))
        elif G.has_edge(proj2, proj1):
            funcs_info[(proj2, proj1)].append((func2, tag2, func1, tag1, weight))

    for u, v in G.edges():
        if (u, v) not in funcs_info:
            print(f"Edge {u} -> {v} in G, but not found in funcs_weight")

    return funcs_info

funcs_info = read_funcs(G)





In [674]:
'''
################## 7. define functions called by recall functions ##################
'''

def get_label_color_map(label_mark):
    unique_labels = list(pd.unique(meta_df[label_mark].fillna("unlabeled")))
    if "unlabeled" in unique_labels:
        unique_labels.remove("unlabeled")
        unique_labels.insert(len(unique_labels), "unlabeled")
    colors = qualitative.Dark24
    return {lab: colors[(i+3) % len(colors)] for i, lab in enumerate(unique_labels)}


# functions "drop_top_percent()" and "get_filter_edge()" used for dropdown "weight-selector" to filter data
def drop_top_percent(group, percent=0.05):
    drop_n = int(len(group) * percent)
    return group.sort_values(by='weight', ascending=False).iloc[drop_n:]


def get_weight_filtered_edge(filter_value):
    meta_dict = meta_df.set_index('proj_name')['date'].to_dict()

    # Temporarily add a time column
    proj_1_times = overall_tree_df['proj_1'].map(meta_dict)
    proj_2_times = overall_tree_df['proj_2'].map(meta_dict)

    # get flow_from and flow_to
    flow_from = overall_tree_df['proj_1'].where(proj_1_times <= proj_2_times, overall_tree_df['proj_2'])
    flow_to = overall_tree_df['proj_2'].where(proj_1_times <= proj_2_times, overall_tree_df['proj_1'])


    # Create a temporary DataFrame for filtering
    temp_df = overall_tree_df.copy()
    temp_df['flow_from'] = flow_from
    temp_df['flow_to'] = flow_to


     # Remove bottom 5% per group for both flow_from and flow_to
    temp_df = (
        pd.concat([
            temp_df.groupby('flow_from', group_keys=False).apply(drop_top_percent),
            temp_df.groupby('flow_to', group_keys=False).apply(drop_top_percent)
        ])
        .drop_duplicates()
        .reset_index(drop=True)
    )

    if filter_value == "ALL":
        filtered_df = temp_df
    else:
        # For each project, keep top filter_value highest-weight flow in/out edges respectively
        top_in = temp_df.sort_values(by='weight', ascending=False).groupby('flow_to').head(int(filter_value))

        top_out = temp_df.sort_values(by='weight', ascending=False).groupby('flow_from').head(int(filter_value))

        # Merge and deduplicate
        filtered_df = pd.concat([top_in, top_out]).drop_duplicates()

    return filtered_df


@lru_cache(maxsize=None)
def trace_path(G, start_node, direction):
    """
    Tracing path: search forward or backward for the path of nodes connected by the highest weight.
    direction: forward, backward
    """
    path = []
    current_node = start_node
    path_nodes = []

    while current_node:
        neighbors = G.successors(current_node) if direction == 'forward' else G.predecessors(current_node)
        max_weight = 0
        next_node = None

        for neighbor in neighbors:
            edge_data = G.get_edge_data(current_node, neighbor) if direction == 'forward' else G.get_edge_data(neighbor, current_node)

            if edge_data['weight'] > max_weight:
                max_weight = edge_data['weight']
                next_node = neighbor

        if next_node:
            path.append((current_node, next_node) if direction == 'forward' else (next_node, current_node))
            path_nodes.append(next_node)
            current_node = next_node
        else:
            break

    return path, path_nodes


# Recalculate node layout dynamically when any dropdown value changes
@lru_cache(maxsize=128)
def compute_dynamic_positions(selected_year, fud_mark, label_mark, unlabeled_filter):
    random.seed(42)
    # filtered_df = get_weight_filtered_edge(weight_filter)
    # filtered_edge_set = set(zip(filtered_df['flow_from'], filtered_df['flow_to']))

    # Filter eligible nodes
    filtered_nodes = []
    for node in G.nodes():
        # if G.nodes[node]['year'] > selected_year:
        #     continue
        if fud_mark == "FUD" and proj_to_labels["FUD?"][node] != "FUD":
            continue
        elif fud_mark == "not FUD" and proj_to_labels["FUD?"][node] == "FUD":
            continue
        if unlabeled_filter == "no unlabeled" and proj_to_labels[label_mark][node] == "unlabeled":
            continue
        filtered_nodes.append(node)

    # ------- X-axis: years from left to right, wider intervals for years with more nodes -------
    filtered_nodes_years = {node: G.nodes[node]['year'] for node in filtered_nodes}
    filtered_year_counts = Counter(filtered_nodes_years.values())
    full_sorted_years = list(range(min_year, max_year + 1))
    for year in full_sorted_years:
        if year not in filtered_year_counts:
            filtered_year_counts[year] = 1

    # filtered_sorted_years = sorted(filtered_year_counts.keys())
    filtered_total_count = sum(filtered_year_counts.values())

    x_min, x_max = -0.4, 0.5
    total_width = x_max - x_min
    x_cursor = x_min
    year_to_x_range = {}

    for year in full_sorted_years:
        proportion = filtered_year_counts[year] / filtered_total_count
        width = total_width * proportion
        year_to_x_range[year] = (x_cursor, x_cursor + width)
        x_cursor += width

    filtered_label_map = {node: proj_to_labels[label_mark][node] for node in filtered_nodes}
    filtered_unique_labels = sorted(set(label for label in filtered_label_map.values() if label != "unlabeled"))

    if len(filtered_unique_labels) == 0:
        filtered_label_to_y_range = {}
    else:
        step = 1.0 / len(filtered_unique_labels)
        filtered_label_to_y_range = {
            label: (-0.5 + i * step, -0.5 + (i+1) * step)
            for i, label in enumerate(filtered_unique_labels)
        }

    dy_positions = {}
    for node in filtered_nodes:
        year = filtered_nodes_years[node]
        label = filtered_label_map[node]


        x_start, x_end = year_to_x_range[year]
        y_start, y_end = filtered_label_to_y_range[label]

        x = random.uniform(x_start, x_end)
        y = random.uniform(y_start, y_end)

        dy_positions[node] = (x, y)

    return dy_positions, year_to_x_range


def add_traced_node_positions(fixed_positions, traced_nodes, year_to_x_range):
    for node in traced_nodes:
        if node in fixed_positions:  # All data not in fixed_positions are unlabeled
            continue
        year = G.nodes[node]['year']

        x_start, x_end = year_to_x_range.get(year, (-0.5, 0.5))
        x = random.uniform(x_start, x_end)

        y = random.uniform(-0.5, 0.5)

        fixed_positions[node] = (x, y)

    return fixed_positions

def format_proj_name(s):
    if len(s) <= 28:
        return f"{s:<28}"  # Left-align and pad to a total length of 28
    else:
        return s[:25] + "..."

def format_func_name(s):
    if len(s) <= 21:
        return f"{s:<21}"
    else:
        return s[:18] + "..."

def format_tag_name(s):
    if len(s) <= 22:
        return f"{s:<22}"
    else:
        return s[:19] + "..."

In [675]:
'''
####################################################################
###   8. deploy visual with dashboard response user interation  ####
####################################################################
'''

# Create Dash app
MAX_STATES = 4
app = dash.Dash(__name__, assets_ignore='.*',
                compress=True,                      # Enable Flask gzip compression
                suppress_callback_exceptions=True,
                )
# app = dash.Dash(__name__)

'''################# 8.1 setting layout for the four visuals ###############'''

app.layout = html.Div(
    style = {
        'display': 'flex',
        'justify-content': 'center',
        'align-items': 'center',
        'height': '100vh',
},
    children=[
        html.Div([
            html.H4("Visualization for Malware Evolution", style={ 'gridArea': 'header', 'textAlign': 'center', 'width': '100%', 'margin': '1px 1px'}),

            html.Div([
                html.Button("Play", id="play-button", n_clicks=0, style={'margin-right': '15px'}),
                html.Div([
                    dcc.Slider(
                        id='year-slider',
                        min=min_year,
                        max=max_year,
                        value=max_year,
                        marks={str(year): str(year) for year in range(min_year, max_year+1)},
                        tooltip={"placement": "bottom", "always_visible": False},
                        # Set to 'drag' for updates during dragging; 'mouseup' triggers update only on mouse release
                        updatemode='mouseup',
                        # included=False,
                        step=None,),
                ], style={'flex': '1'},),
            ], style={'gridArea': 'button-slider', 'display': 'flex', 'alignItems': 'center','width': '80%'}),

            html.Div(
                children = [
                    dcc.Dropdown(
                        id='sizeMark-selector',
                        options=[
                            {'label': 'size_by_labelWeight', 'value': 'size indicate labelWeight'},
                            {'label': 'size_by_year', 'value':'size indicate year'}
                            ],
                    value='size indicate labelWeight',  # The default size of marks represents labelWeight
                    style={'width': '160px', 'marginRight': '5px','font-size':'12px','height': '25px'},
                    ),
                    dcc.Dropdown(
                        id='FUD-selector',
                        options=[
                            {'label': 'FUD', 'value': 'FUD'},
                            {'label': 'not FUD', 'value':'not FUD'},
                            {'label': 'ALL', 'value':'ALL'},
                            ],
                    value='ALL',
                    style={'width': '140px','marginRight': '10px', 'font-size':'12px','height': '25px'},
                    ),
                    dcc.Dropdown(
                        id='labelMark-selector',
                        options=[
                            {'label': 'FILE', 'value':'FILE'},
                            {'label': 'FAM', 'value':'FAM'},
                            {'label': 'VULN', 'value':'VULN'},
                            {'label': 'BEH', 'value':'BEH'},
                            {'label': 'CLASS', 'value':'CLASS'},
                            {'label': 'UNK', 'value':'UNK'},
                            {'label': 'PACK', 'value':'PACK'},
                            ],
                    value='CLASS',
                    style={'width': '140px','marginRight': '10px', 'font-size':'12px','height': '25px'},
                    ),
                    dcc.Dropdown(
                        id='unlabeled-selector',
                        options=[
                            {'label': 'no unlabeled', 'value': 'no unlabeled'},
                            # {'label': 'include unlabeled', 'value':'include unlabeled'},
                            ],
                    value='no unlabeled',
                    style={'width': '140px','marginRight': '10px', 'font-size':'12px','height': '25px'},
                    ),
                    dcc.Dropdown(
                        id='weight-selector',
                        options=[
                            {'label': 'top 50 flow in/out', 'value': '50'},
                            {'label': 'top 30 flow in/out', 'value': '30'},
                            {'label': 'top 20 flow in/out', 'value': '20'},
                            {'label': 'top 10 flow in/out', 'value': '10'},
                            {'label': 'top 1 flow in/out', 'value': '1'},
                            {'label': 'ALL flow', 'value':'ALL'},
                            ],
                    value='10',
                    style={'width': '200px','marginRight': '10px', 'font-size':'12px','height': '25px'},
                    ),
                ], style={'gridArea': 'selectors', 'display': 'flex', 'alignItems': 'center','justifyContent': 'flex-start', 'width': '80%'}),


            dcc.Interval(id='interval', interval=2000, n_intervals=0, disabled=True),  # Initially disabled, set the time interval for play

            html.Div(
                    "Double-click on the visual to reset the layout",
                style={
                        'position': 'absolute',
                        'top': '50px',
                        'right': '10px',
                        'padding': '5px 10px',
                        'background-color': 'rgba(100, 100, 150, 0.3)',
                        'color': 'white',
                        'border-radius': '5px',
                        'font-size': '16px',
                        'box-shadow': '0px 4px 6px rgba(0, 0, 0, 0.3)',
                        'z-index': 1000
                    }
            ),

            html.Div([
                dcc.Graph(id='malevo_network',
                          config={
                                    'staticPlot': False,  # Must be False to allow interaction
                                    'scrollZoom': True,
                                    'displayModeBar': True,
                                    'displaylogo': False
                        })
            ] ,style={'gridArea': 'network','margin': '1px 1px','width': '95%','height': '95%' }),

            html.Div([
                dcc.Graph(id='tracing_path',
                          config={
                                    'staticPlot': False,
                                    'scrollZoom': True,
                                    'displayModeBar': False,
                                    'displaylogo': False
                        })
            ] ,style={'gridArea': 'tracing','margin': '1px 1px','width': '96%','height': '95%' }),

            html.Div(id='debug-info', style={'gridArea': 'debug','whiteSpace': 'pre-line', 'marginTop': '20px', 'color': 'blue'}),

        ] , style={
            'display': 'grid',
            'gridTemplateAreas': '''
                "header header"
                "button-slider button-slider"
                "selectors selectors"
                "network tracing"
                "debug debug"
            ''',
            'gridTemplateRows': '2vh 4vh 2vh 94vh',
            'gridTemplateColumns': '55fr 45fr',
            'height': '100vh',  # Make the grid fill the height of the viewport
            'gap': '10px',
            'alignItems': 'center',
            'justifyItems': 'center'
        })
    ]
)

In [676]:
'''################# 8.2 catch play button and year slider info ###############'''
@app.callback(
    [Output('interval', 'disabled'),
     Output('interval', 'n_intervals'),
     Output('year-slider', 'value')],
    [Input('play-button', 'n_clicks'),
     Input('interval', 'n_intervals')],
    [State('year-slider', 'value')]
)
def toggle_and_play(n_clicks, n_intervals, current_year):
    ctx = dash.callback_context
    if not ctx.triggered:
        return True, 0, current_year

    # Determine the trigger source
    trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]

    if trigger_id == 'play-button':
        # Logic for Play button click
        start_year = min_year if current_year >= max_year else current_year
        return False, 0, start_year  # Enable playback and start from the specified year

    elif trigger_id == 'interval':
        # Update year when Interval is triggered
        if current_year < max_year:
            return False, n_intervals, current_year + 1
        return True, 0, current_year  # Stop playback and stay at the maximum year


In [677]:
'''################# 8.3 visuals responsed to user interation ###############'''
@app.callback(
    [Output('malevo_network', 'figure'),
     Output('tracing_path', 'figure')],
    [Input('year-slider', 'value'),
     Input('FUD-selector', 'value'),
     Input('labelMark-selector', 'value'),
     Input('unlabeled-selector', 'value'),
     Input('weight-selector', 'value'),
     Input('sizeMark-selector', 'value'),
     Input('malevo_network', 'clickData')]
)
def update_graphs(selected_year, fud_mark, label_mark, unlabeled_filter, weight_filter, size_mark, click_data):
    global fixed_positions

    # Recalculate fixed_positions
    fixed_positions, year_to_x_range = compute_dynamic_positions(selected_year, fud_mark, label_mark, unlabeled_filter)


    clicked_node = None
    forward_path = []
    backward_path = []
    tracing_nodes = []


    # Check whether there is a click event
    if click_data and 'points' in click_data:
        clicked_node = click_data['points'][0].get('customdata')  # 获取点击的节点
        if clicked_node in G:
            forward_path, forward_nodes = trace_path(G, clicked_node, direction='forward')
            backward_path, backward_nodes = trace_path(G, clicked_node, direction='backward')
            tracing_nodes = forward_nodes + [clicked_node] + backward_nodes

            fixed_positions = add_traced_node_positions(fixed_positions, tracing_nodes, year_to_x_range)

    # Update the main graph malevo
    malevo_fig = update_malevo_graph(selected_year, fud_mark, label_mark, unlabeled_filter, weight_filter, size_mark, clicked_node, tracing_nodes, forward_path, backward_path,year_to_x_range)

    # Update the subgraph tracing graph (only show content along the tracing path)
    tracing_fig = update_tracing_graph(selected_year, fud_mark, label_mark, unlabeled_filter, weight_filter, size_mark, clicked_node, tracing_nodes, forward_path, backward_path)

    return malevo_fig, tracing_fig


In [678]:
'''################# 8.4 function for malevo graph update ###############'''
def update_malevo_graph(selected_year, fud_mark, label_mark, unlabeled_filter, weight_filter, size_mark, clicked_node, tracing_nodes, forward_path, backward_path,year_to_x_range):
    pio.templates.default = 'ggplot2'
    malevo_fig = go.Figure()

    label_color_map = get_label_color_map(label_mark)

    # if click_data and 'points' in click_data:
    if clicked_node:
        # Add a pseudo title "Tracing Path" to the legend
        malevo_fig.add_trace(go.Scatter(
            x=[None], y=[None],
            mode='lines', line=dict(color='rgba(0,0,0,0)', width=0),
            legendgroup="tracing",
            name="<b>Tracing Path</b>",
            showlegend=True
        ))

        # Add red edge for Backward Path in the legend
        malevo_fig.add_trace(go.Scatter(
            x=[None],  y=[None], # Do not render actual edges
            mode='lines', line=dict(color='#FF0000', width=2),
            legendgroup="tracing",
            name='Backward Path',
            showlegend=True))

        # Add legend entry for blue Forward Path edges
        malevo_fig.add_trace(go.Scatter(
            x=[None],  y=[None],
            mode='lines', line=dict(color='#0000FF', width=2),
            name='Forward Path',
            legendgroup="tracing",
            showlegend=True))
    else:
        print("Clicked node not in graph or fixed_positions.")


    if not G.nodes:
        return go.Figure(layout=dict(
            title="No Data Available for the Selected Year",
            height=800,
            width=1000
    ))

    filtered_df = get_weight_filtered_edge(weight_filter)
    filtered_edge_set = set(zip(filtered_df['flow_from'], filtered_df['flow_to']))


    # # Draw edges in the malevo_fig figure
    # for u, v, data in G.edges(data=True):
    #     is_visible = G.nodes[u]['year'] <= selected_year and G.nodes[v]['year'] <= selected_year
    #     x_start,y_start = fixed_positions[u]
    #     x_end,y_end = fixed_positions[v]
    #     weight= edge_weight_map[data['weight']]
    #     edge_color = '#0000FF' if (u, v) in forward_path else '#FF0000' if (u, v) in backward_path else 'gray'
    #
    #     # Generate multiple intermediate points between the start and end points
    #     num_segments = 10  # Divide the edge into 20 segments
    #     x_coords = np.linspace(x_start, x_end, num_segments)
    #     y_coords = np.linspace(y_start, y_end, num_segments)
    #
    #     malevo_fig.add_trace(go.Scatter(
    #         # x = [x_start, x_end],
    #         # y = [y_start, y_end],
    #         x=x_coords,
    #         y=y_coords,
    #         mode='lines',
    #         line = dict(color=edge_color, width=weight),
    #         marker=dict(size=3, opacity=0),
    #         visible= is_visible,
    #         showlegend=False,
    #         hovertemplate=f'<b>Edge:</b> {u} → {v}<br>'
    #                   f'<b>Weight:</b> {data["weight"]}<extra></extra>',
    #         ))


    # ---- Batch merge edges to significantly reduce the number of traces ----
    edge_x, edge_y, edge_c = [], [], []        # Coordinates + colors
    for u, v, data in G.edges(data=True):
        if not (G.nodes[u]['year'] <= selected_year and G.nodes[v]['year'] <= selected_year):
            continue
        if (u, v) in forward_path or (u, v) in backward_path:
            continue                       # Skip highlighted edges for now
        if fud_mark == "FUD":
            if proj_to_labels["FUD?"][u]=="unlabeled" \
                    or proj_to_labels["FUD?"][v]=="unlabeled" :
                continue
        elif fud_mark == "not FUD":
            if proj_to_labels["FUD?"][u]=="FUD" or proj_to_labels["FUD?"][v]=="FUD":
                continue
        if unlabeled_filter == "no unlabeled":
            if proj_to_labels[label_mark][u] == 'unlabeled' \
                or proj_to_labels[label_mark][v] == 'unlabeled':
                continue
        if weight_filter != "ALL" and (u, v) not in filtered_edge_set:
            continue

        x0, y0 = fixed_positions[u]
        x1, y1 = fixed_positions[v]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]

    malevo_fig.add_trace(go.Scattergl(
        x=edge_x, y=edge_y,
        mode='lines',
        line=dict(color='#888888', width=1),
        marker=dict(color=edge_c),
        hoverinfo='skip',
        showlegend=False,
    ))


    # # Draw malevo_fig nodes
    # for node in G.nodes():
    #     is_visible = G.nodes[node]['year'] <= selected_year
    #     is_clicked_node = node == clicked_node
    #     p_x, p_y = fixed_positions[node]
    #
    #     cur_label  = proj_to_labels[label_mark][node]
    #     cur_weight = proj_to_weights[label_mark][node]
    #
    #     if size_mark == "size indicate year":
    #         node_size = year_size_map[G.nodes[node]['year']]
    #     else:  # size indicate labelWeight
    #         node_size = LABEL_SIZE_CACHE[label_mark].get(cur_weight, 15)
    #
    #     marker_color = label_color_map.get(cur_label,'gray')
    #     # if color_mark == "colors mark label":
    #     #     marker_color = label_color_map.get(cur_label,'gray')
    #     # else:
    #     #     marker_color = year_color_map.get(G.nodes[node]['year'],'gray')
    #     malevo_fig.add_trace(go.Scatter(
    #         x=[p_x],
    #         y=[p_y],
    #         mode='markers+text',
    #         marker = dict(size=node_size,
    #                       color=marker_color,
    #                       line = dict(color='black' if is_clicked_node else None,
    #                                   width=2 if is_clicked_node else 0)),
    #         text=node,
    #         customdata=[node],
    #         textposition= "top center",
    #         name=node,
    #         visible= is_visible,
    #         hovertemplate=f"Malware: {node}<br>Year: {G.nodes[node]['year']}<br>Label: {cur_label}<extra></extra>",
    #         showlegend=False
    #         ))


    node_bucket = defaultdict(lambda: {'x':[], 'y':[], 'text':[], 'hover':[]})
    for node in G.nodes():
        if G.nodes[node]['year'] > selected_year:
            continue
        if fud_mark == "FUD" and proj_to_labels["FUD?"][node]=="unlabeled":
            continue
        elif fud_mark == "not FUD" and proj_to_labels["FUD?"][node]=="FUD":
            continue
        cur_label  = proj_to_labels[label_mark][node]
        if unlabeled_filter == "no unlabeled" and cur_label =="unlabeled":
            continue
        p_x, p_y = fixed_positions[node]
        cur_weight = proj_to_weights[label_mark][node]
        size_key   = year_size_map[G.nodes[node]['year']] if size_mark=="size indicate year" \
                     else LABEL_SIZE_CACHE[label_mark].get(cur_weight, 15)
        color_key  = label_color_map.get(cur_label,'gray')

        bucket_key = (size_key, color_key)
        node_bucket[bucket_key]['x'].append(p_x)
        node_bucket[bucket_key]['y'].append(p_y)
        node_bucket[bucket_key]['text'].append(node)
        node_bucket[bucket_key]['hover'].append(
            f"Malware: {node}<br>Year: {G.nodes[node]['year']}<br>Label: {cur_label}<br>LabelWeight: {cur_weight}"
        )

    # Add the node buckets to the Figure
    for (size_key, color_key), d in node_bucket.items():
        malevo_fig.add_trace(go.Scattergl(
            x=d['x'], y=d['y'],
            # mode='markers+text',
            mode='markers',
            marker=dict(size=size_key, color=color_key, line=dict(width=0.5,color='black')),
            # text=d['text'],
            # textposition="top center",
            customdata=d['text'], hovertemplate=d['hover'],
            # hovertemplate="%{customdata}", customdata=d['hover'],
            showlegend=False
        ))


    # Highlight the edges of tracing nodes
    if clicked_node:
        for u, v in forward_path + backward_path:
            is_visible = G.nodes[u]['year'] <= selected_year and G.nodes[v]['year'] <= selected_year
            x_start,y_start = fixed_positions[u]
            x_end,y_end = fixed_positions[v]
            weight= edge_weight_map[G.get_edge_data(u, v)['weight']]
            edge_color = '#0000FF' if (u, v) in forward_path else '#FF0000'

            # Generate multiple intermediate points between the start and end nodes
            sub_num_segments = 10  # Divide the edge into 5 segments
            sub_x_coords = np.linspace(x_start, x_end, sub_num_segments)
            sub_y_coords = np.linspace(y_start, y_end, sub_num_segments)

            # Add edges in the subgraph and hover information for traceable nodes
            malevo_fig.add_trace(go.Scattergl(
                # x = [x_start, x_end],
                # y = [y_start, y_end],
                x=sub_x_coords,
                y=sub_y_coords,
                mode='lines',
                line = dict(color=edge_color, width=weight),
                marker=dict(size=3, opacity=0),  # Set intermediate points to be transparent
                visible= is_visible,
                showlegend=False,
                hovertemplate=f'<b>Edge:</b> {u} → {v}<br>'  # Display the start and end nodes
                      f'<b>Weight:</b> {G.get_edge_data(u, v)["weight"]}<extra></extra>',
            ))

        # Highlight the tracing nodes
        for node in tracing_nodes:
            is_visible = G.nodes[node]['year'] <= selected_year
            is_clicked_node = node == clicked_node
            p_x, p_y = fixed_positions[node]

            cur_label  = proj_to_labels[label_mark][node]
            cur_weight = proj_to_weights[label_mark][node]

            if size_mark == "size indicate year":
                node_size = year_size_map[G.nodes[node]['year']]
            else:  # size indicate labelWeight
                node_size = LABEL_SIZE_CACHE[label_mark].get(cur_weight, 15)

            marker_color = label_color_map.get(cur_label,'gray')

            # if color_mark == "colors mark label":
            #     marker_color = label_color_map.get(cur_label,'gray')
            # else:
            #     marker_color = year_color_map.get(G.nodes[node]['year'],'gray')

            malevo_fig.add_trace(go.Scatter(
                x=[p_x],
                y=[p_y],
                mode='markers+text',
                marker = dict(size=node_size,
                              color=marker_color,
                              line = dict(color='black' if is_clicked_node else None,
                                          width=2 if is_clicked_node else 0)),
                text=node,
                customdata=[node],
                textposition= "top center",
                name=node,
                visible= is_visible,
                hovertemplate=f"Malware: {node}<br>Year: {G.nodes[node]['year']}<br>Label: {cur_label}<br>LabelWeight: {cur_weight}<extra></extra>",
                showlegend=False
                ))


    # Add label color legend for malevo_fig, or use color_bar for years
    # if color_mark == "colors mark label":
    ## Add a pseudo-title "Label" for the label legend
    malevo_fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=0, color='rgba(0,0,0,0)'),  # 隐藏伪标题点
        legendgroup="label",
        name="<b>label</b>",
        showlegend=True
    ))
    # Add colored legend entries for each malware label
    for label_i, color in label_color_map.items():
        malevo_fig.add_trace(go.Scatter(
            x=[None], y=[None], mode='markers',
            marker=dict(size=15, color=color),
            # legendgroup=label_i,
            legendgroup="label",
            showlegend=True,
            name=f"{label_i}"))
    # else:
    #     colorscale = []
    #     for i in range(len(year_colors)):
    #         start = i/len(year_colors)
    #         end = (i+1)/len(year_colors)
    #         colorscale.append([start, year_colors[i]])
    #         colorscale.append([end, year_colors[i]])
    #     malevo_fig.add_trace(go.Scatter(
    #         x=[None], y=[None], mode='markers',
    #         marker=dict(size=0,
    #                     colorscale = colorscale,
    #                     cmin=0,
    #                     cmax=1,
    #                     color=[0],
    #                     colorbar=dict(title="Year",
    #                                   tickvals = [i/(len(bins)-1) for i in range(len(bins))],
    #                                   ticktext=bins,
    #                                   len=0.6,
    #                                   thickness=20,
    #                                   tickmode="array",
    #                                   ticks="outside",
    #                                   ticklen=5,
    #                                   outlinewidth=0.6,
    #                                   ),
    #                     ),
    #         hoverinfo='none',
    #         showlegend=False
    #         ))

    # Add year-based size legend with three horizontally aligned, tangent circles
    # mid_year= unique_years[len(unique_years)//2]
    # sizes_for_legend = [12,14,40]
    # years_for_legend = [min_year, mid_year, max_year]
    #
    # for i, (size, year) in enumerate(zip(sizes_for_legend, years_for_legend)):
    #     malevo_fig.add_trace(go.Scatter(
    #         x=[-1.1], y=[1.05],
    #         mode='markers',
    #         marker=dict(size=size,
    #                     color='white',
    #                     line=dict(width=1, color='black')
    #                     ),
    #         showlegend=True,
    #         name=f"Year: {year}"
    #     ))


    # Update layout for the main figure
    if not fixed_positions:
        print("Warning: there is no this kind of data.")
        return go.Figure()

    x_coords = [pos[0] for pos in fixed_positions.values()]
    y_coords = [pos[1] for pos in fixed_positions.values()]

    x_min, x_max = min(x_coords), max(x_coords)
    y_min, y_max = min(y_coords), max(y_coords)
    margin_factor = 0.05
    x_range = [x_min - (x_max - x_min) * margin_factor, x_max + (x_max - x_min) * margin_factor]
    y_range = [y_min - (y_max - y_min) * margin_factor, y_max + (y_max - y_min) * margin_factor]

    malevo_fig.update_layout(
        title=dict(
            text = f"Tracing Malware Evolution Through Code Reuse Analysis (Up to <span style='color:red; font-size:24px;'>{selected_year}</span>)",
            font = dict(size=16),
            # y=1
            ),
        showlegend=True,
        dragmode='pan',
        legend=dict(
            # title="Legend",
            # itemsizing='constant',
            traceorder="grouped",
            x=-0.13,
            y=1,
            xanchor="left",
            yanchor="top"
        ),
        xaxis=dict(showgrid=False, zeroline=False, visible=False, range=x_range),
        yaxis=dict(showgrid=False, zeroline=False, visible=False, range=y_range),
        margin=dict(l=50, r=50, t=50, b=50),
        height=800,
        width=1200,
        font=dict(size=10),
        hovermode="closest"
    )


    # Add year tick labels
    for year, (x_start, x_end) in year_to_x_range.items():
        width = x_end - x_start
        # Show only if it's the starting year or the width is large enough
        if year == min_year or year == max_year or width > 0.015:
            x_pos = x_start
            malevo_fig.add_annotation(
                x=x_pos,
                y=0, yref='paper',        # y-axis positioned at the bottom of the canvas
                text=str(year),
                showarrow=False,
                font=dict(size=10, color='black'),
                xanchor='left',
                yanchor='top'             # Align the top of the text with the canvas bottom at y=0
            )

            # Add vertical dashed lines to separate each year
            malevo_fig.add_shape(
                type="line",
                x0=x_start, x1=x_start,
                y0=y_range[0], y1=y_range[1],
                line=dict(
                    color="gray",
                    width=1,
                    dash="dash"
                ),
                layer="below"
            )

    return malevo_fig


In [679]:
'''################# 8.5 function for tracing graph update ###############'''
def update_tracing_graph(selected_year, fud_mark, label_mark, unlabeled_filter, weight_filter, size_mark, clicked_node, tracing_nodes, forward_path,backward_path):
    pio.templates.default = 'ggplot2'
    tracing_fig = go.Figure()
    label_color_map = get_label_color_map(label_mark)

    # ===== Skip if "no path" =====
    # If tracing_nodes is empty, return an empty or placeholder figure directly,
    # and skip all further computations like min()/max()
    if not tracing_nodes:
        return go.Figure(
            layout=dict(
                xaxis=dict(visible=False),
                yaxis=dict(visible=False),
                annotations=[dict(
                    text="No tracing path",
                    showarrow=False,
                    x=0.5, y=0.5, xref='paper', yref='paper',
                    font=dict(size=16)
                )]
            )
        )


    if clicked_node:
        for u, v in forward_path + backward_path:
            is_visible = G.nodes[u]['year'] <= selected_year and G.nodes[v]['year'] <= selected_year
            x_start,y_start = fixed_positions[u]
            x_end,y_end = fixed_positions[v]
            weight= edge_weight_map[G.get_edge_data(u, v)['weight']]
            edge_color = '#0000FF' if (u, v) in forward_path else '#FF0000'

            # Generate multiple intermediate points between the start and end points
            sub_num_segments = 5
            sub_x_coords = np.linspace(x_start, x_end, sub_num_segments)
            sub_y_coords = np.linspace(y_start, y_end, sub_num_segments)

            proj1 = format_proj_name(u)
            proj2 = format_proj_name(v)
            lines = [
                "Project level code reuse:",
                # "<span style='font-family:monospace; white-space:pre'>"
                "    proj1                         =>   proj2                          weight",
                f"    {proj1}  =>   {proj2}   {G[u][v]['weight']}",
                "",
                "Function level code reuse:"
            ]


            for f1, t1, f2, t2, w in funcs_info.get((u, v), []):
                func1 = format_func_name(f1)
                tag1 = format_tag_name(t1)
                func2 = format_func_name(f2)
                tag2 = format_tag_name(t2)
                lines.append(f"    func1: {func1}  =>   func2: {func2}   weight:{w}")
                lines.append(f"    tag1: {tag1}  =>   tag2: {tag2}")
                lines.append("    ")

            hover_text = "<br>".join(lines)

            # Add edges in the subgraph and enable hover info for traceable points
            tracing_fig.add_trace(go.Scatter(
                # x = [x_start, x_end],
                # y = [y_start, y_end],
                x=sub_x_coords,
                y=sub_y_coords,
                mode='lines',
                line = dict(color=edge_color, width=weight),
                marker=dict(size=3, opacity=0),
                visible= is_visible,
                showlegend=False,
                # hovertemplate=hover_text,
                text=[hover_text]*len(sub_x_coords),
                hoverinfo='text'
            ))

        # Add nodes for the subgraph
        for node in tracing_nodes:
            is_visible = G.nodes[node]['year'] <= selected_year
            is_clicked_node = node == clicked_node
            p_x, p_y = fixed_positions[node]

            cur_label  = proj_to_labels[label_mark][node]
            cur_weight = proj_to_weights[label_mark][node]

            if size_mark == "size indicate year":
                node_size = year_size_map[G.nodes[node]['year']]
            else:  # size indicate labelWeight
                node_size = LABEL_SIZE_CACHE[label_mark].get(cur_weight, 15)

            marker_color = label_color_map.get(cur_label,'gray')

            # if color_mark == "colors mark label":
            #     marker_color = label_color_map.get(cur_label,'gray')
            # else:
            #     marker_color = year_color_map.get(G.nodes[node]['year'],'gray')

            tracing_fig.add_trace(go.Scatter(
                x=[p_x],
                y=[p_y],
                mode='markers+text',
                marker = dict(size=node_size,
                              color=marker_color,
                              line = dict(color='black' if is_clicked_node else None,
                                          width=2 if is_clicked_node else 0)),
                text=node+f" ({G.nodes[node]['year']})",
                customdata=[node],
                textposition= "top center",
                name=node,
                visible= is_visible,
                hovertemplate=f"Malware: {node}<br>Year: {G.nodes[node]['year']}<br>Label: {cur_label}<br>LabelWeight: {cur_weight}<extra></extra>",
                showlegend=False
                ))

        # Update layout of the subgraph
        sub_x_coords = [fixed_positions[node][0] for node in tracing_nodes]
        sub_y_coords = [fixed_positions[node][1] for node in tracing_nodes]

        sub_x_min, sub_x_max = min(sub_x_coords), max(sub_x_coords)
        sub_y_min, sub_y_max = min(sub_y_coords), max(sub_y_coords)
        margin_factor = 0.065
        sub_x_range = [sub_x_min - (sub_x_max - sub_x_min) * margin_factor, sub_x_max + (sub_x_max - sub_x_min) * margin_factor]
        sub_y_range = [sub_y_min - (sub_y_max - sub_y_min) * margin_factor, sub_y_max + (sub_y_max - sub_y_min) * margin_factor]

        tracing_fig.update_layout(
            title=dict(
                text = "Tracing Path Detail",
                font = dict(size=16),
                # y=1
                ),
            hoverlabel=dict(font_family="Courier New, monospace"),
            showlegend=True,
            dragmode='pan',
            xaxis=dict(showgrid=False, zeroline=False, visible=False, range=sub_x_range),
            yaxis=dict(showgrid=False, zeroline=False, visible=False, range=sub_y_range),
            margin=dict(l=50, r=50, t=50, b=50),
            height=800,
            # width=1200,
            font=dict(size=10),
            hovermode="closest"
        )

    return tracing_fig



In [680]:
# # Debug
# @app.callback(
#     Output('debug-info', 'children'),  # Output to debug area
#     [Input('malevo_network', 'clickData')]  # Listen for click events
# )
# def display_debug_info(click_data):
#     debug_info = []
#     for proj_name, (x, y) in fixed_positions.items():
#         debug_info.append(f"Node: {proj_name}, Year: {years[proj_name]}, x: {x}")
#     # print(f"Node: {proj_name}, Year: {years[proj_name]}, x: {x}, y: {y}")
#     if click_data and 'points' in click_data:
#         clicked_node = click_data['points'][0].get('customdata', None)
#         debug_info.append(f"Clicked Node: {clicked_node}")
#         forward_path =[]
#         edge_colors = []
#         if clicked_node in G:
#             debug_info.append(f"'{clicked_node}' is in the graph.")
#             forward_path = trace_path(G, clicked_node, direction='forward')
#             backward_path = trace_path(G, clicked_node, direction='backward')
#
#             for u, v, data in G.edges(data=True):
#
#                 edge_color = 'orange' if (u, v) in forward_path else 'blue' if (u, v) in backward_path else 'black'
#                 edge_colors.append(edge_color)
#             # debug_info.append(f"Forward Path: {forward_path}")
#             debug_info.append(f"Backward Path: {backward_path}")
#             debug_info.append(f"Forward Path: {forward_path}")
#             debug_info.append(f"edge_colors: {edge_colors}")
#         else:
#             debug_info.append(f"'{clicked_node}' is NOT in the graph.")
#     else:
#         debug_info.append("No click data available.")
#
#     return "\n".join(debug_info)


# run Bashboard to display visualization on website
if __name__ == '__main__':

    # Dash default port is 8050; for multiple services, you can use ports 8051 to 8099.
    app.run_server(debug=True, use_reloader=False, host='0.0.0.0', port=8070)

    # Now, you can visit http://localhost:8070 or your server's URL:8070 for interaction or your 
