In [1]:
!pip install --upgrade pandas plotly scikit-learn


Collecting pandas
  Downloading pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Downloading pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.8 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m1.8 MB/s[0m  [33m0:00:06[0ma [36m0:00:01[0mm eta [36m0:00:01[0m
[?25hInstalling collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 2.3.2
    Uninstalling pandas-2.3.2:
      Successfully uninstalled pandas-2.3.2
Successfully installed pandas-2.3.3


In [2]:
import json
import pandas as pd
import plotly.express as px

In [13]:
# Load log file
with open("EXP1_CNN_fashion_mnist_1_logs.json", "r") as f:
    logs = json.load(f)

# Prepare round-level data
round_data = []
for round_entry in logs:
    clients = round_entry["clients_logs"]
    round_number = clients[0]["server_round_number"]
    total_examples = sum(c["num-examples"] for c in clients)
    avg_duration = sum(c["round_duration"] for c in clients) / len(clients)
    round_acc = round_entry.get("round_acc", None)
    total_data = round_entry.get("total_amount_data_round_mb", None)
    num_rounds = clients[0]["num_rounds"]
    lr = clients[0]["lr"]

    round_data.append({
        "round": round_number,
        "total_examples": total_examples,
        "avg_duration": avg_duration,
        "accuracy": round_acc,
        "data_mb": total_data,
        "num_rounds": num_rounds,
        "lr": lr
    })

df_rounds = pd.DataFrame(round_data)

# Prepare client-level data
client_data = []
for round_entry in logs:
    for c in round_entry["clients_logs"]:
        # avg number of examples per client
        avg_examples = c["num-examples"] / c["num_rounds"] if c["num_rounds"] > 0 else 0
        client_data.append({
            "round": c["server_round_number"],
            "client_id": c["client_id"],
            "duration": c["round_duration"],
            "loss": c["round_loss"],
            "num_examples": c["num-examples"],
            "avg_examples": avg_examples,
        })

df_clients = pd.DataFrame(client_data)


Round-based plots

1. Number of examples per round

In [14]:
fig1 = px.line(
    df_rounds, x="round", y="total_examples",
    title="Total Number of Examples per Round",
    hover_data=["num_rounds", "lr"]
)
fig1.show()


2. Duration per round

In [5]:
fig2 = px.line(
    df_rounds, x="round", y="avg_duration",
    title="Average Round Duration",
    hover_data=["num_rounds", "lr"]
)
fig2.show()


3. Accuracy per round

In [6]:
fig3 = px.line(
    df_rounds, x="round", y="accuracy",
    title="Accuracy per Round",
    hover_data=["num_rounds", "lr"]
)
fig3.show()


4. Data transmitted per round

In [7]:
fig4 = px.line(
    df_rounds, x="round", y="data_mb",
    title="Data Transmitted per Round (MB)",
    hover_data=["num_rounds", "lr"]
)
fig4.show()


Client-based plots

1. Duration per client across all rounds

In [8]:
df_client_duration = df_clients.groupby("client_id")["duration"].sum().reset_index()

fig5 = px.bar(
    df_client_duration, x="client_id", y="duration",
    title="Total Training Duration per Client"
)
fig5.show()


2. Loss per client across all rounds

In [9]:
df_client_loss = df_clients.groupby("client_id")["loss"].sum().reset_index()

fig6 = px.bar(
    df_client_loss, x="client_id", y="loss",
    title="Total Training Loss per Client"
)
fig6.show()


3. Average number of examples per client across all rounds

In [15]:
df_client_avg_examples = df_clients.groupby("client_id")["avg_examples"].mean().reset_index()
fig7 = px.bar(
    df_client_avg_examples, x="client_id", y="avg_examples",
    title="Average Number of Examples per Client"
)
fig7.show()