NetApp monitoring using Diamond and https://github.com/SpringerPE/diamond-ontapclustercollector
Example configuration to send metrics graphite using OntapClusterCollector and Diamond v4:
properties:
diamond:
collector:
hostname: netappcollector
interval: 60
handlers:
diamond.handler.graphite.GraphiteHandler:
batch: 1
host: "graphite.example.com"
port: 2003
timeout: 15
collectors_config:
OntapClusterCollector:
enabled: True
path_prefix: "netapp"
reconnect: 60
hostname_method: "none"
splay: 15
interval: 45
config: |
[devices]
[[cluster_name]]
ip = X.X.X.X
user = admin
password = XXXXXXXXX
apiversion = 1.15
publish = 1
[[[aggregate=nodes.${node_name}.aggr.${instance_name}]]]
total_transfers = rate_ops
user_reads = rate_ops_reads
user_writes = rate_ops_writes
cp_reads = rate_ops_cp_reads
user_reads_ssd = rate_ops_reads_ssd
user_writes_ssd = rate_ops_writes_sdd
cp_reads_sdd = rate_ops_cp_reads_sdd
user_read_blocks = rate_blocks_user_reads
user_write_blocks = rate_blocks_user_writes
cp_read_blocks = rate_blocks_cp_reads
user_read_blocks_ssd = rate_blocks_user_reads_sdd
user_write_blocks_ssd = rate_blocks_user_writes_sdd
cp_read_blocks_ssd = rate_blocks_cp_reads_ssd
wv_fsinfo_blks_used = blocks_used
wv_fsinfo_blks_total = blocks_total
[[[disk=nodes.${node_name}.aggr.${raid_group}.${instance_name}]]]
disk_speed = rpm
total_transfers = rate_iops
user_reads = rate_ops_reads
user_writes = rate_ops_writes
cp_reads = rate_read_cp
disk_busy = pct_busy
io_pending = avg_iops_pending
io_queued = avg_iops_queued
user_write_blocks = rate_blocks_write
user_write_latency = avg_latency_micros_write
user_read_blocks = rate_blocks_read
user_read_latency = avg_latency_micros_read
cp_read_blocks = rate_blocks_cp_read
cp_read_latency = avg_latency_micros_cp_read
[[[processor=nodes.${node_name}.processor.${instance_name}]]]
processor_busy = pct_busy
processor_elapsed_time = time_elapsed
domain_busy = -
[[[system:node=nodes.${instance_name}.system]]]
nfs_ops = rate_ops_nfs
cifs_ops = rate_ops_cifs
fcp_ops = rate_ops_fcp
iscsi_ops = rate_ops_iscsi
read_ops = rate_ops_read
write_ops = rate_ops_write
iscsi_data_recv = rate_kbytes_iscsi_recv
iscsi_data_sent = rate_kbytes_iscsi_sent
net_data_recv = rate_kbytes_net_recv
net_data_sent = rate_kbytes_net_sent
fcp_data_recv = rate_kbytes_fcp_recv
fcp_data_sent = rate_kbytes_fcp_sent
read_data = rate_kbytes_disk_read
write_data = rate_kbytes_disk_written
ssd_data_read = rate_kbytes_ssd_data_read
ssd_data_written = rate_kbytes_ssd_data_written
total_data = rate_kbytes_total_data
write_latency = avg_latency_ms_write
read_latency = avg_latency_ms_read
total_latency = avg_latency_ms
total_ops = rate_ops
memory = total_memory
idle = pct_cpu_idle
cpu_busy = pct_cpu_busy
cpu_elapsed_time = base_time_cpu_elapsed
avg_processor_busy = pct_processors_all_avg_busy
total_processor_busy = pct_processors_all_total_busy
[[[ip=nodes.${node_name}.ip.${instance_name}]]]
sent_packets = pkts_sent
recv_packets = pkts_recv
recv_bad_checksum = pkts_bad_checksum_recv
reassembly_queue_overflow = pkts_overflow_dropped
fragments_drops = fragments_dropped
[[[fcp_port=nodes.${node_name}.fc.${instance_name}]]]
avg_read_latency = avg_latency_micros_read
avg_write_latency = avg_latency_micros_write
queue_full = delta_queue_full
[[[lif=nodes.${node_name}.net.${instance_name}]]]
recv_packet = rate_pkts_recv
recv_errors = rate_recv_errors
sent_packet = rate_pkts_sent
sent_errors = rate_sent_errors
recv_data = rate_bytes_recv
sent_data = rate_bytes_sent
[[[client=nodes.${node_name}.client.${instance_name}]]]
rx_data = rate_bytes_rx
rx_packets = packets_rx
tx_data = rate_bytes_tx
tx_packets = packets_tx
[[[ext_cache_obj=nodes.${node_name}.ext_cache.${instance_name}]]]
usage = pct_usage_blocks
accesses = cnt_delta_accesses
blocks = cnt_blocks
disk_reads_replaced = rate_disk_replaced_readio
hit = rate_hit_buffers
hit_flushq = rate_flushq_hit_buffers
hit_once = rate_once_hit_buffers
hit_age = rate_age_hit_buffers
miss = rate_miss_buffers
miss_flushq = rate_flushq_miss_buffers
miss_once = rate_once_miss_buffers
miss_age = rate_age_miss_buffers
hit_percent = pct_hit
inserts = rate_inserts_buffers
inserts_flushq = rate_flushq_inserts_buffers
inserts_once = rate_once_inserts_buffers
inserts_age = rate_age_inserts_buffers
reuse_percent = pct_reuse
evicts = rate_evicts_blocks
evicts_ref = rate_ref_evicts_blocks
invalidates = rate_invalidates_blocks
[[[wafl=nodes.${node_name}.${instance_name}]]]
name_cache_hit = rate_cache_hits
total_cp_msecs = cnt_msecs_spent_cp
wafl_total_blk_writes = rate_blocks_written
wafl_total_blk_readaheads = rate_blocks_readaheads
wafl_total_blk_reads = rate_blocks_read
# New object
[[[workload_volume=vservers.${data_object_type}.${data_object_name}]]]
ops = rate_ops
read_data = rate_bytes_read
read_latency = avg_latency_micros_read
read_ops = rate_ops_read
read_io_type_base = -
write_data = rate_bytes_write
write_latency = avg_latency_micros_write
write_ops = rate_ops_write
[[[nfsv4=vservers.${instance_name}.nfsv4]]]
write_avg_latency = avg_latency_micros_write
read_avg_latency = avg_latency_micros_read
total_ops = rate_ops
nfsv4_ops = rate_nfsv4_ops
nfs4_read_throughput = rate_throughput_nfs4_read
nfs4_write_throughput = rate_throughput_nfs4_write
read_percent = pct_read_ops
write_percent = pct_write_ops
[[[nfsv4_1=vservers.${instance_name}.nfsv41]]]
write_avg_latency = avg_latency_micros_write
read_avg_latency = avg_latency_micros_read
total_ops = rate_ops
nfsv4_1_ops = rate_nfsv41_ops
nfs41_read_throughput = rate_throughput_nfs41_read
nfs41_write_throughput = rate_throughput_nfs41_write
read_percent = pct_read_ops
write_percent = pct_write_ops
[[[nfsv3=vservers.${instance_name}.nfsv3]]]
nfsv3_ops = rate_ops_nfsv3
nfsv3_read_ops = rate_ops_nfsv3_read
nfsv3_write_ops = rate_ops_nfsv3_write
read_total = cnt_ops_read
write_total = cnt_ops_write
write_avg_latency = avg_latency_micros_nfsv3_write
read_avg_latency = avg_latency_micros_nfsv3_read
nfsv3_write_throughput = rate_throughput_nfsv3_write
nfsv3_read_throughput = rate_throughput_nfsv3_read
nfsv3_throughput = rate_throughput_nfsv3
nfsv3_dnfs_ops = rate_ops_nfsv3_oracle
[[[iscsi_lif:vserver=vservers.${instance_name}.iscsi]]]
iscsi_read_ops = rate_ops_iscsi_read
iscsi_write_ops = rate_ops_iscsi_write
avg_write_latency = avg_latency_micros_iscsi_write
avg_read_latency = avg_latency_micros_iscsi_read
avg_latency = avg_latency_micros_iscsi
data_in_sent = cnt_blocks_recv
data_out_blocks = cnt_blocks_sent
[[[cifs=vservers.${instance_name}.cifs]]]
cifs_ops = rate_ops_cifs
cifs_read_ops = rate_ops_cifs_read
cifs_write_ops = rate_ops_cifs_write
cifs_latency = avg_latency_micros_cifs
cifs_write_latency = avg_latency_micros_cifs_write
cifs_read_latency = avg_latency_micros_cifs_read
connected_shares = cnt_cifs_connected_shares
reconnection_requests_total = cnt_cifs_reconnection_requests_total
For bosh-lite, you can quickly create a deployment manifest & deploy the release:
templates/make_manifest warden
bosh -n deploy