In [1]:
import pickle

trace_file_path = "../benchmark/dataset/data/device_info/client_behave_trace"  # Adjust if needed

with open(trace_file_path, "rb") as f:
    trace_data = pickle.load(f)

In [2]:
print("Type of loaded object:", type(trace_data))

# Preview first 10 items
print(f"\nFirst {min(10, len(trace_data))} items:")
for i, (key, value) in enumerate(trace_data.items()):
    if i >= 10:
        break
    print(f"\nKey: {key}")
    for k, v in value.items():
        print(f"  {k}: {v}")

Type of loaded object: <class 'collections.OrderedDict'>

First 10 items:

Key: 1
  duration: 972262
  inactive: [59046, 87718, 161219, 1081886]
  finish_time: 1123200
  active: [51490, 60870, 140727, 164520]
  model: LG-K350

Key: 2
  duration: 691032
  inactive: [12823, 658127, 685128, 715053, 765786, 843740, 917249, 917251]
  finish_time: 950400
  active: [10596, 12885, 683099, 713791, 750327, 834323, 901855, 917249]
  model: CPH1909

Key: 3
  duration: 683891
  inactive: [109, 4829, 5790, 6531, 7062, 7103, 8804, 9704, 10470, 10564, 11227, 332404, 417355, 418657, 425945, 428792, 434786, 436048, 436051, 440068, 464612, 504109, 509786, 510945, 510964, 512744, 522375, 542098, 591416, 595495, 598149, 600897, 605927, 612416, 613358, 617562, 629093, 635062, 635649, 637467, 638229, 640019, 640635, 641061, 643812, 644125, 677584, 691196, 764740, 782890, 783373, 783374, 783762, 784356, 786158, 788755, 790541, 790563, 791501, 793749, 794954, 795216, 795307, 797007, 797039, 797048, 797864, 797

In [3]:
print(f"Total clients (keys): {len(trace_data)}")

# Initialize accumulators
active_lengths = []
inactive_lengths = []
durations = []
finish_times = []

for trace in trace_data.values():
    durations.append(trace.get('duration', 0))
    finish_times.append(trace.get('finish_time', 0))
    active_lengths.append(len(trace.get('active', [])))
    inactive_lengths.append(len(trace.get('inactive', [])))

# Print basic statistics
print("\n--- Duration Statistics ---")
print(f"Min: {min(durations)}")
print(f"Max: {max(durations)}")
print(f"Mean: {sum(durations) / len(durations):.2f}")

print("\n--- Finish Time Statistics ---")
print(f"Min: {min(finish_times)}")
print(f"Max: {max(finish_times)}")
print(f"Mean: {sum(finish_times) / len(finish_times):.2f}")

print("\n--- Active/Inactivity Span Counts ---")
print(f"Average # of active intervals per client: {sum(active_lengths) / len(active_lengths):.2f}")
print(f"Average # of inactive intervals per client: {sum(inactive_lengths) / len(inactive_lengths):.2f}")

# Optional: consistency check
inconsistent_clients = sum(1 for i, trace in enumerate(trace_data.values())
                           if len(trace.get('active', [])) != len(trace.get('inactive', [])))
print(f"\nClients with mismatched active/inactive counts: {inconsistent_clients}")

Total clients (keys): 107749

--- Duration Statistics ---
Min: 1
Max: 972262
Mean: 79687.61

--- Finish Time Statistics ---
Min: 86400
Max: 1296000
Mean: 393232.30

--- Active/Inactivity Span Counts ---
Average # of active intervals per client: 20.81
Average # of inactive intervals per client: 20.81

Clients with mismatched active/inactive counts: 0


In [4]:
import pickle

capacity_file_path = "../benchmark/dataset/data/device_info/client_device_capacity"  # Adjust if needed

with open(capacity_file_path, "rb") as f:
    capacity_data = pickle.load(f)

In [5]:
print("Type of loaded object:", type(capacity_data))

# Preview first 10 items
print(f"\nFirst {min(10, len(capacity_data))} items:")
for i, (key, value) in enumerate(capacity_data.items()):
    if i >= 10:
        break
    print(f"\nKey: {key}")
    for k, v in value.items():
        print(f"  {k}: {v}")

Type of loaded object: <class 'dict'>

First 10 items:

Key: 1
  computation: 153.0
  communication: 2209.615982329485

Key: 2
  computation: 39.0
  communication: 18437.311985652217

Key: 3
  computation: 22.0
  communication: 22915.494847737755

Key: 4
  computation: 149.0
  communication: 13507.696000153657

Key: 5
  computation: 29.0
  communication: 6924.407283130328

Key: 6
  computation: 138.0
  communication: 14506.494272424507

Key: 7
  computation: 176.0
  communication: 32545.573620752573

Key: 8
  computation: 44.0
  communication: 42360.068898122656

Key: 9
  computation: 154.0
  communication: 1931.2100827831061

Key: 10
  computation: 64.0
  communication: 3345.713888063672


In [6]:
import statistics

print(f"Total clients (keys): {len(capacity_data)}")

# Initialize accumulators
communications = []
computations   = []

for trace in capacity_data.values():
    # use the correct singular key names
    communications.append(trace.get('communication', 0))
    computations.append(trace.get('computation', 0))

# Print basic statistics
print("\n--- Communication Bandwidth Statistics ---")
print(f"Min:  {min(communications)}")
print(f"Max:  {max(communications)}")
print(f"Mean: {statistics.mean(communications):.2f}")

print("\n--- Computation Speed Statistics (ms per sample) ---")
print(f"Min:  {min(computations)}")
print(f"Max:  {max(computations)}")
print(f"Mean: {statistics.mean(computations):.2f}")

Total clients (keys): 500000

--- Communication Bandwidth Statistics ---
Min:  1016.8808511071711
Max:  204056.74594524835
Mean: 13736.14

--- Computation Speed Statistics (ms per sample) ---
Min:  15.0
Max:  199.0
Mean: 78.02


In [7]:
import pickle

trace_file_path = "../benchmark/dataset/data/device_info/client_behave_trace"  # Adjust if needed

with open(trace_file_path, "rb") as f:
    trace_data = pickle.load(f)

total_active_time = 0
total_time = 0
client_proportions = []

for client_id, client_trace in trace_data.items():
    active_times = sorted(client_trace['active'])
    inactive_times = sorted(client_trace['inactive'])
    finish_time = client_trace['finish_time']

    # Merge events and track their origin
    events = []
    for t in active_times:
        events.append((t, 'active'))
    for t in inactive_times:
        events.append((t, 'inactive'))

    events.sort(key=lambda x: x[0])  # Sort by timestamp

    if not events:
        continue  # Skip if no events

    # Determine initial state based on first event
    first_event_type = events[0][1]
    if first_event_type == 'active':
        is_active = False  # Client starts inactive until first 'active' timestamp
    else:
        is_active = True  # Client starts active until first 'inactive' timestamp

    active_time = 0
    previous_time = 0

    for timestamp, event_type in events:
        if is_active:
            active_time += timestamp - previous_time
        previous_time = timestamp
        is_active = not is_active  # Switch state

    # Handle final interval to finish_time
    if is_active:
        active_time += finish_time - previous_time

    client_total_time = finish_time
    client_proportion = active_time / client_total_time if client_total_time > 0 else 0

    total_active_time += active_time
    total_time += client_total_time
    client_proportions.append(client_proportion)

# Final statistics
average_proportion = total_active_time / total_time if total_time > 0 else 0

print(f"Average active proportion across all clients: {average_proportion:.4f}")

# Optionally, you can print some examples
print("\nSample of individual client proportions:")
for i in range(min(10, len(client_proportions))):
    print(f"Client {i + 1}: {client_proportions[i]:.4f}")

Average active proportion across all clients: 0.2026

Sample of individual client proportions:
Client 1: 0.8656
Client 2: 0.7271
Client 3: 0.7915
Client 4: 0.7011
Client 5: 0.8640
Client 6: 0.8392
Client 7: 0.6711
Client 8: 0.9129
Client 9: 0.9092
Client 10: 0.7888


In [1]:
import pickle

my_clients_file_path = "../clients.pkl"  # Adjust if needed

with open(my_clients_file_path, "rb") as f:
    my_clients_data = pickle.load(f)

In [2]:
print("Type of loaded object:", type(my_clients_data))

# Preview first 10 items
print(f"\nFirst {min(10, len(my_clients_data))} items:")
for i, (key, value) in enumerate(my_clients_data.items()):
    if i >= 10:
        break
    print(f"\nKey: {key}")
    for k, v in value.items():
        print(f"  {k}: {v}")

Type of loaded object: <class 'collections.OrderedDict'>

First 10 items:

Key: 1
  id: 1
  livelab-id: 782
  model: Fire HDX 8.9 (2014)
  osVersion: 4.2.2
  timeZone: Europe/Amsterdam
  batteryLevel: [93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 92, 93, 93, 93, 56, 92, 92, 92, 92, 92, 92, 92, 92, 92, 56, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 57, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 59, 92, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 60, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89

In [7]:
import pickle
from collections import Counter, OrderedDict
from pathlib import Path
import numpy as np

# Load the dataset
file_path = Path("../benchmark/dataset/data/clients.pkl")
with open(file_path, "rb") as f:
    clients = pickle.load(f)

# Check that the file is an OrderedDict
assert isinstance(clients, OrderedDict), "clients.pkl is not an OrderedDict."

# Keys to skip
keys_to_skip = {
    'timestamps-livelab', 'timestamps-carat', 'batteryLevel',
    'rssi', 'active', 'inactive', 'availability', 'id', 'livelab-id'
}

# Initialize structures
key_value_counters = {}
array_value_ranges = {}
id_values = set()

# Get the reference keys (assuming all should have the same keys)
reference_keys = set(next(iter(clients.values())).keys())
inconsistent_clients = []

for client_id, client_data in clients.items():
    current_keys = set(client_data.keys())
    
    # Check if keys are consistent
    if current_keys != reference_keys:
        inconsistent_clients.append(client_id)
    
    # Track ID values
    id_values.add(client_data.get('id'))

    for key, value in client_data.items():
        if key in keys_to_skip:
            if key == 'id':
                continue  # Already handled
            if isinstance(value, (list, np.ndarray)):
                value_array = np.array(value)

                # Skip empty arrays
                if value_array.size == 0:
                    continue

                # Compute min and max directly (works for both numbers and strings)
                current_min = value_array.min()
                current_max = value_array.max()

                if key not in array_value_ranges:
                    array_value_ranges[key] = {'min': current_min, 'max': current_max}
                else:
                    array_value_ranges[key]['min'] = min(array_value_ranges[key]['min'], current_min)
                    array_value_ranges[key]['max'] = max(array_value_ranges[key]['max'], current_max)
            continue

        # Process keys that are NOT skipped
        if isinstance(value, (list, np.ndarray)):
            value_array = np.array(value)

            # Skip empty arrays
            if value_array.size == 0:
                continue

            # Count each distinct element in the array
            if key not in key_value_counters:
                key_value_counters[key] = Counter()
            key_value_counters[key].update(value_array.tolist())

        else:
            # Scalar value: count directly
            if key not in key_value_counters:
                key_value_counters[key] = Counter()
            key_value_counters[key][value] += 1

# Print distinct values and counts
for key, counter in key_value_counters.items():
    print(f"\nKey: {key}")
    for val, count in counter.items():
        print(f"  Value: {val} - Count: {count}")

# Print ranges for skipped array keys
print("\nRanges for skipped array keys:")
for key, range_vals in array_value_ranges.items():
    print(f"  Key: {key} - Range: [{range_vals['min']}, {range_vals['max']}]")

# Check if ID keys are as expected
expected_ids = set(range(1, 12642))
if id_values == expected_ids:
    print("\n✅ ID values correctly span from 1 to 12641.")
else:
    print("\n❌ ID values do not span correctly.")
    missing_ids = expected_ids - id_values
    extra_ids = id_values - expected_ids
    if missing_ids:
        print(f"Missing IDs: {sorted(missing_ids)}")
    if extra_ids:
        print(f"Unexpected IDs: {sorted(extra_ids)}")

# Print inconsistent clients
if inconsistent_clients:
    print("\n❌ Inconsistent keys found in the following clients:")
    print(inconsistent_clients)
else:
    print("\n✅ All clients have consistent keys.")


Key: model
  Value: Fire HDX 8.9 (2014) - Count: 6
  Value: Le S3 - Count: 3
  Value: Yuphoria - Count: 37
  Value: Iconia Smart - Count: 13
  Value: TouchPad - Count: 9
  Value: Fire HD 7 - Count: 3
  Value: Galaxy Note 10.1 N8000 - Count: 28
  Value: Galaxy Tab 10.1 P7510 - Count: 4
  Value: Galaxy Tab 8.9 P7300 - Count: 3
  Value: P7500 Galaxy Tab 10.1 3G - Count: 2
  Value: Aquaris E5s - Count: 1
  Value: I9500 Galaxy S4 - Count: 66
  Value: I9295 Galaxy S4 Active - Count: 223
  Value: I9506 Galaxy S4 - Count: 145
  Value: Galaxy S5 Neo - Count: 108
  Value: Galaxy Note 3 - Count: 415
  Value: I9505 Galaxy S4 - Count: 232
  Value: Galaxy S4 CDMA - Count: 56
  Value: DROID Mini - Count: 64
  Value: DROID Ultra - Count: 201
  Value: Moto X - Count: 192
  Value: Moto G - Count: 67
  Value: I9305 Galaxy S III - Count: 1373
  Value: Galaxy Note II CDMA - Count: 105
  Value: Galaxy J5 (2016) - Count: 103
  Value: Galaxy Note II N7100 - Count: 252
  Value: Galaxy S III I747 - Count: 147


In [2]:
import pickle
import json
from collections import OrderedDict
from pathlib import Path

# Fill in the client IDs you want to inspect
clients_to_load = [1058, 1136, 1517, 1821, 565, 1734, 1821]

# Path to your pickled OrderedDict
file_path = Path("../benchmark/dataset/data/clients.pkl")

# Load the data
with file_path.open("rb") as f:
    clients = pickle.load(f)

assert isinstance(clients, OrderedDict), "Expected an OrderedDict"

# Collect the selected clients
selected_clients = []
for key, client in clients.items():
    if client.get('id') in clients_to_load:
        selected_clients.append(client)

# Save to JSON
output_path = Path("../clients_preview.json")
with output_path.open("w", encoding="utf-8") as f:
    json.dump(selected_clients, f, indent=2)

print(f"✅ Saved {len(selected_clients)} clients to {output_path}")

✅ Saved 6 clients to ../clients_preview.json
