# Compute exchanged data

In [1]:
import json
from pathlib import Path

import networkx as nx
import numpy as np
from keras.saving.saving_api import load_model
from matplotlib import pyplot as plt

from gossiplearning.config import Config

In [5]:
N_TIMESTEPS = 4

with open("config.json", "r") as f:
    config = Config.model_validate(json.load(f))

In [3]:
dataset_base_dir = Path("data/datasets/1func_10nodes_3k")
network_base_dir = Path("data/networks/10_nodes_3k")

### Centralized exchanged bytes

In [23]:
def get_timeseries_size(dataset: dict[str, np.ndarray]) -> int:
    # every dataset entry is a different timestep. We should also count the first N-1 timesteps that
    # do not have an entry in X_train since we don't have enough past data for them (N is the number of
    # previous steps used for prediction)
    n_timesteps = len(dataset["X_train"]) + len(dataset["X_val"]) + N_TIMESTEPS
    timestep_size = dataset["Y_train"][0].nbytes
    
    # if nodes agree on the frequency, only the first timestamp should be sent, and the size is negligible (one timestamp, int64)

    return n_timesteps * timestep_size

In [30]:
# load the datasets
n_simulations = 5

exchanged_bytes = []

for i in range(n_simulations):
    network = nx.read_adjlist(network_base_dir / str(i) / "adj_list.txt", nodetype=int)
    paths_length = tuple(
        dist for node, dist in nx.shortest_path_length(network)
    )
    
    dataset_sizes = [
        get_timeseries_size(np.load(dataset_base_dir / str(i)/ f"4in" / f"node_{node}.npz"))
        for node in range(config.n_nodes)
    ]
        
    for master_node in range(config.n_nodes):
        simulation_exchanged_data = 0
        for node in range(config.n_nodes):
            simulation_exchanged_data += dataset_sizes[node] * paths_length[master_node][node]

        exchanged_bytes.append(simulation_exchanged_data)

In [34]:
with open(dataset_base_dir / "exchanged_bytes.txt", "w") as f:
    print(exchanged_bytes, file=f)

### Gossip exchanged bytes

In [6]:
# load one model to get the size (they are all the same)
model_path = Path(config.workspace_dir) / "0" / "models" / "0.h5"
model = load_model(str(model_path))

Metal device set to: Apple M1 Pro


In [8]:
layer_weights = model.get_weights()
model_weights_bytes = sum([lw.nbytes for lw in layer_weights])
avg_exchanged_model_bytes = model_weights_bytes * config.training.perc_sent_weights

In [11]:
# avg_gossip_exchanged_bytes = 0
# avg_num_of_messages = 0

exp="grid_1fn_10n3k"
for i in range(0, 78):
    dir = Path(f"experiments/{exp}_{i}")
    gossip_exchanged_bytes = []
    
    for i in range(5):
        history_path = Path(dir / str(i) / "history.json")
        history = json.load(history_path.open("r"))
    
        n_exchanged_messages = len(history["messages"])
        simulation_exchanged_data = n_exchanged_messages * avg_exchanged_model_bytes
    
        gossip_exchanged_bytes.append(simulation_exchanged_data)
    
    with open(dir / "exchanged_data.txt", "w") as f:
        print(gossip_exchanged_bytes, file=f)

### Comparison

In [10]:
plt.ioff()

fig, ax = plt.subplots()

ax.bar(
    ["Centralized", "Gossip"],
    height=[avg_centralized_exchanged_bytes, avg_gossip_exchanged_bytes],
    color=["tomato", "royalblue"],
)
ax.set_title("Exchanged data comparison")
ax.set_ylabel("Exchanged bytes")

plots_folder = Path(config.workspace_dir) / "plots"
plots_folder.mkdir(parents=True, exist_ok=True)

out_path = plots_folder / "exchanged_data.png"
plt.savefig(out_path, dpi=300)

In [11]:
avg_num_of_messages

14.34

In [12]:
exchanged_data_df = pd.DataFrame(data=[
    {"exchanged_bytes": avg_centralized_exchanged_bytes}, {"exchanged_bytes": avg_gossip_exchanged_bytes},
], index=["Centralized", "Gossip"])

out_path = Path(config.workspace_dir) / "exchanged_data.csv"

exchanged_data_df.to_csv(out_path)