## Monitoring Cluster Performance with Python SDK

In [None]:
pip install matplotlib ipython aistore

### Creating an AIStore Cluster Instance

In [6]:
from aistore.sdk import Client

# Use the client class to access the AIS cluster:
ais_url = "http://localhost:8080"
client = Client(ais_url)
cluster = client.cluster()

### Performance Metrics
AIStore offers detailed insights into cluster performance through three primary classes: NodeThroughput, NodeLatency, and NodeCounter. Here's a brief overview of each:

- NodeThroughput: Measures data processing rates, focusing on the bandwidth for both data reading (GET) and writing (PUT) operations.
- NodeLatency: Captures delays in data transfer, offering average sizes for GET and PUT operations and detailing the latency of accessing infrequently used (cold) data.
- NodeCounter: Tracks operational events and errors, including counts for various operations (e.g., GET, PUT, DELETE) and specific events like cache evictions or version changes.

In [None]:
cluster_performance = cluster.get_performance()
cluster_performance.as_dict()

### Metrics in Human Readable Format

In [None]:
throughput_dict = cluster_performance.throughput
for target_id, throughput in throughput_dict.items():
    throughput = throughput.as_dict()
    print(f"{target_id}: {throughput}")

### Example - Plotting throughput

In [2]:
import matplotlib.pyplot as plt
from IPython.display import clear_output
import time
import numpy as np

get_bw_values = []
put_bw_values = []


# Helper function to convert bandwidth to MB/s
def bw_to_mbps(bw_value):
    bw_mbps = bw_value / 1e6
    return bw_mbps


def plot(cluster_performance):
    target_ids = list(cluster_performance.throughput.keys())

    # Initialize a figure
    _, axs = plt.subplots(
        len(target_ids), 1, figsize=(10, 5 * len(target_ids)), squeeze=False
    )

    for idx, target_id in enumerate(target_ids):
        throughput_data = cluster_performance.throughput[target_id]
        get_bw_values.append(bw_to_mbps(throughput_data.get_bw))
        put_bw_values.append(bw_to_mbps(throughput_data.put_bw))
        time_steps = np.arange(len(get_bw_values))  # Create a time axis

        # Plot "get" and "put" bandwidth
        axs[idx][0].plot(
            time_steps, get_bw_values, label="GET Bandwidth (MB/s)", marker="o"
        )
        axs[idx][0].plot(
            time_steps, put_bw_values, label="PUT Bandwidth (MB/s)", marker="x"
        )

        axs[idx][0].set_title(f"Node {target_id} Throughput Over Time")
        axs[idx][0].set_xlabel("Time")
        axs[idx][0].set_ylabel("Bandwidth (MB/s)")
        axs[idx][0].legend()
    plt.tight_layout()
    plt.show()


def plot_live_metrics(cluster, duration, frequency=2):
    passed = 0
    while True:
        cluster_performance = cluster.get_performance()
        # Clear the current plot to prepare for the next one
        clear_output(wait=True)
        plot(cluster_performance)

        # Wait before the next update
        time.sleep(frequency)
        passed += frequency

        if passed >= duration:
            get_bw_values.clear()
            put_bw_values.clear()
            break

        plt.clf()

In [None]:
plot_live_metrics(cluster, duration=60, frequency=10)