In [1]:
import json
import os
import qumulo
from qumulo.rest_client import RestClient

In [2]:
API_HOSTNAME = '10.120.0.34'
API_USERNAME = 'admin'
API_PASSWORD = 'Admin123'

In [3]:
def cluster_login(api_hostname, api_username, api_password):
    """
    Accept api_hostname, api_username and api_password as parameters. Log into
    cluster via Qumulo Rest API. Return rest_client for all future API calls.
    """
    rest_client = RestClient(api_hostname, 8000)
    rest_client.login(api_username, api_password)

    return rest_client

In [4]:
rest_client = cluster_login(API_HOSTNAME, API_USERNAME, API_PASSWORD)

In [5]:
# rest_client.ad.list_ad()

In [6]:
# rest_client.cluster.list_nodes()

In [7]:
rest_client.cluster.list_nodes()[0]['node_status']

'online'

In [8]:
rest_client.node_state.get_node_state()

{'node_id': 1,
 'state': 'ACTIVE',
 'cluster_id': 'cf83e828-7ef7-4368-a75b-3b972d10f2c6'}

In [9]:
# rest_client.cluster.get_cluster_slots_status()

In [10]:
multilinestring = """
this
that
then
your mom
"""

In [11]:
multilinestring

'\nthis\nthat\nthen\nyour mom\n'

In [12]:
print(multilinestring)


this
that
then
your mom



In [13]:
# rest_client.cluster.get_cluster_slots_status()

In [14]:
os.getcwd()

'/Users/rthompson/Documents/Dev/Python/cluster-event-alerts'

In [15]:
# if 'cluster_state.json' in os.listdir():
#     os.rename('cluster_state.json','cluster_state_previous.json')
#     PREVIOUS_EXISTED = True

with open('cluster_state.json', 'w') as f:
    for data in rest_client.cluster.list_nodes():
        f.writelines(data)

In [16]:
# for item in rest_client.cluster.list_nodes():
#     print(item)

In [17]:
# rest_client.cluster.list_nodes()[:2]

In [18]:
rest_client.cluster.list_nodes()[0]

{'id': 1,
 'node_status': 'online',
 'node_name': 'CoffeeTime-1',
 'uuid': '10a1c7aa-fb99-48a1-8dc3-b34b96777742',
 'label': '00:50:56:bf:68:82',
 'model_number': 'QVIRT',
 'serial_number': 'QVIRT',
 'mac_address': '00:50:56:bf:68:82'}

In [19]:
node_relevant_fields = ['id','node_status','node_name','uuid','model_number','serial_number']
drive_relevant_fields = ['id','node_id','slot','state','slot_type','disk_type','disk_model','disk_serial_number','capacity']

In [20]:
newlist = []
for entry in range(len(rest_client.cluster.list_nodes())):
    newdict = {}
    for k,v in rest_client.cluster.list_nodes()[entry].items():
        if k in node_relevant_fields:
            newdict[k] = v
    newlist.append(newdict)

In [21]:
for entry in range(len(rest_client.cluster.list_nodes())):
    print(entry)

0
1
2
3


In [22]:
rest_client.cluster.list_nodes()[1].items()

dict_items([('id', 2), ('node_status', 'online'), ('node_name', 'CoffeeTime-2'), ('uuid', 'cbdea0e3-1659-48af-b15b-e97dbbeefd04'), ('label', '00:50:56:bf:f1:57'), ('model_number', 'QVIRT'), ('serial_number', 'QVIRT'), ('mac_address', '00:50:56:bf:f1:57')])

In [23]:
def get_cluster_time(rest_client):
    """
    Get current cluster time and return as cluster_time.
    """
    
    cluster_time = rest_client.time_config.get_time_status()['time']
    
    return cluster_time

In [47]:
def get_cluster_name(rest_client):
    """
    Query API for cluster name. Return cluster name as string.
    """
    cluster_name = rest_client.cluster.get_cluster_conf()['cluster_name']

    return cluster_name

In [80]:
def get_cluser_uuid(rest_client):
    """
    Query API for cluster UUID number. Return UUID as string.
    """
    
    cluster_uuid = rest_client.node_state.get_node_state()['cluster_id']
    
    return cluster_uuid

In [25]:
def get_qq_version(rest_client):
    """
    Query API for Qumulo Core version. Return version as string.
    """

    qq_version = rest_client.version.version()['revision_id']

    return qq_version

In [26]:
def retrieve_status_of_cluster_nodes(rest_client):
    """
    Accept rest_client object to query via API call to retrieve info/status for
    nodes. Parse through information and record relevant information. Return
    dict object to later dump as json.
    """
    node_relevant_fields = [
        'id',
        'node_status',
        'node_name',
        'uuid',
        'model_number',
        'serial_number',
    ]

    temp_list = []
    for num in range(len(rest_client.cluster.list_nodes())):
        new_dict = {}
        for k,v in rest_client.cluster.list_nodes()[num].items():
            if k in node_relevant_fields:
                new_dict[k] = v
        temp_list.append(new_dict)
    
    status_of_nodes = {}
    status_of_nodes['nodes'] = temp_list

    return status_of_nodes

In [27]:
def retrieve_status_of_cluster_drives(rest_client):
    """
    Accept rest_client object to query via API call to retrieve info/status for
    drives. Parse through information and record relevant information. Return
    dict object to later dump as json.
    """
    drive_relevant_fields = [
        'id',
        'node_id',
        'slot',
        'state',
        'slot_type',
        'disk_type',
        'disk_model',
        'disk_serial_number',
        'capacity',
    ]

    temp_list = []
    for num in range(len(rest_client.cluster.get_cluster_slots_status())):
        new_dict = {}
        for k,v in rest_client.cluster.get_cluster_slots_status()[num].items():
            if k in drive_relevant_fields:
                new_dict[k] = v
        temp_list.append(new_dict)

    status_of_drives = {}
    status_of_drives['drives'] = temp_list

    return status_of_drives

In [28]:
status_of_nodes = retrieve_status_of_cluster_nodes(rest_client)
# status_of_nodes

In [29]:
status_of_drives = retrieve_status_of_cluster_drives(rest_client)
# status_of_drives

In [30]:
def combine_statuses_formatting(status_of_nodes, status_of_drives):
    """
    In order to adhere to proper json formatting, this func will combine the
    two status_of_nodes and status_of_drives dictionary objects into one
    single dictionary object and return this as cluster_status.
    """
    status_of_nodes['drives'] = status_of_drives['drives']    
    cluster_status = status_of_nodes
    
    return cluster_status

In [31]:
cluster_status = combine_statuses_formatting(status_of_nodes, status_of_drives)

In [32]:
def check_for_previous_state(cluster_status):
    """
    If cluster_state.json exists, rename it to cluster_state_previous.json.
    Regardless of this, also create cluster_state.json and write node + drive
    statuses to file. Return boolean for previous_existed.
    """

    if 'cluster_state.json' in os.listdir():
        os.rename('cluster_state.json','cluster_state_previous.json')
        previous_existed = True
    else:
        previous_existed = False

    with open('cluster_state.json', 'w') as f:
        json.dump(cluster_status, f, indent=4)

    return previous_existed

In [33]:
def compare_states():
    """
    Only being ran if previous_existed is true, this func will compare the
    json files for the previous and current cluster state. Return bool for
    whether or not the data has changed. Return bool for if changes were found.
    """    

    file1 = 'cluster_state.json'
    file2 = 'cluster_state_previous.json'

    with open(file1) as f1, open(file2) as f2:
        data1, data2 = json.load(f1), json.load(f2)
        changes = data1 == data2

    # XXX: can potentially remove all of this
    if changes:
        print('Changes found!! Scanning for unhealthy objects.') # XXX: Later remove
    else:
        print('Changes not found! Not scanning for unhealthy objects') # XXX: Later remove

    return changes

In [34]:
previous_existed = check_for_previous_state(cluster_status)

In [35]:
def check_for_unhealthy_objects():
    """ 
    Scan the cluster_state.json file to determine whether or not there are 
    unhealthy objects. If there are unhealthy objects, append the data to
    new dict object called alert_data, which will later be used to populate
    the alert. Also return whether or not cluster is healthy as bool. 
    """
    healthy = True

    with open('cluster_state_TEST.json') as f:  # XXX: Later change value to 'cluster_state.json'
        data = json.load(f)
        alert_data = {}
        counter = 1
        
        # scan through json for offline nodes
        for dictobj in data['nodes']:
            for k,v in dictobj.items():
                if k == 'node_status':
                    if v != 'online':
                        print('ALERT!! UNHEALTHY NODE FOUND.') # XXX: Later remove
                        alert_data[f'Event {counter}'] = dictobj
                        counter += 1
                        healthy = False
        # scan through json for unhealthy drives
        for dictobj in data['drives']:
            for k,v in dictobj.items():
                if k == 'state':
                    if v != 'healthy':
                        print('ALERT!! UNHEALTHY DRIVE FOUND.') # XXX: Later remove 
                        alert_data[f'Event {counter}'] = dictobj   
                        counter += 1
                        healthy = False
            
    if healthy:
        print('No unhealthy changes found.')

    print(f'alert data: {alert_data}') # XXX: later remove

    return alert_data, healthy

In [39]:
alert_data, healthy = check_for_unhealthy_objects()

ALERT!! UNHEALTHY NODE FOUND.
ALERT!! UNHEALTHY DRIVE FOUND.
alert data: {'Event 1': {'id': 2, 'node_status': 'offline', 'node_name': 'CoffeeTime-2', 'uuid': 'cbdea0e3-1659-48af-b15b-e97dbbeefd04', 'model_number': 'QVIRT', 'serial_number': 'QVIRT'}, 'Event 2': {'id': '2.2', 'node_id': 2, 'slot': 2, 'state': 'unhealthy', 'slot_type': 'SSD', 'disk_type': 'SSD', 'disk_model': 'Virtual_disk', 'disk_serial_number': '', 'capacity': '10467934208'}}


In [36]:
json1 = json.dumps('cluster_state.json', sort_keys=True)
json2 = json.dumps('cluster_state_TEST.json', sort_keys=True)
print(f'comparison: {json1 != json2}')

comparison: True


In [37]:
with open('cluster_state.json') as f1, open('cluster_state_TEST.json') as f2:
    data1 = json.load(f1)
    data2 = json.load(f2)
    something = data1 == data2
    print(something)

False


In [43]:
node_event = 'A node has gone offline.'

for item in alert_data:
    for k,v in alert_data[item].items():
        if k == 'node_status':
            print(node_event)

A node has gone offline.


In [73]:
rest_client.cluster.list_node(2)['uuid']

'cbdea0e3-1659-48af-b15b-e97dbbeefd04'

In [101]:
for val in alert_data['Event 2'].items():
    print(val)

('id', '2.2')
('node_id', 2)
('slot', 2)
('state', 'unhealthy')
('slot_type', 'SSD')
('disk_type', 'SSD')
('disk_model', 'Virtual_disk')
('disk_serial_number', '')
('capacity', '10467934208')


In [96]:
qq_version = get_qq_version(rest_client)
cluster_name = get_cluster_name(rest_client)
cluser_uuid = get_cluser_uuid(rest_client)
cluster_time = get_cluster_time(rest_client)
alert_header = '=' * 18 + ' CLUSTER EVENT ALERT! ' + '=' * 18
email_alert = f"""{alert_header}\nUnhealthy object(s) found. See below for info
and engage Qumulo Support in your preferred fashion.

Cluster name: {cluster_name}
Cluster UUID: {cluster_uuid}
Approx. time: {cluster_time}

Event(s) found:
"""

node_event_heading = '=' * 15 + ' A node has gone offline. ' + '=' * 15
drive_event_heading = '=' * 15 + ' A drive is no longer healthy. ' + '=' * 15

for item in alert_data:
    for k,v in alert_data[item].items():
        if k == 'node_status':    # this is a node alert
            email_alert += node_event_heading
            node_alert_text = f"""
            Node number: {alert_data[item]['id']}
            Node status: {alert_data[item]['node_status']}
            Serial Number: {alert_data[item]['serial_number']}
            Node UUID: {alert_data[item]['uuid']}           
            Node Type: {alert_data[item]['model_number']}
            Qumulo Core Version: {qq_version}
            """

            email_alert += node_alert_text + '\n'

        elif k == 'disk_type':    # this is a drive alert
            email_alert += drive_event_heading
            drive_alert_text = f"""
            Node number: {alert_data[item]['node_id']}
            Drive slot: {alert_data[item]['slot']}
            Drive status: {alert_data[item]['state']}
            Slot type: {alert_data[item]['slot_type']}
            Disk type: {alert_data[item]['disk_type']}
            Disk model: {alert_data[item]['disk_model']}
            Disk serial number: {alert_data[item]['disk_serial_number']}
            Disk capacity: {alert_data[item]['capacity']}
            """

            email_alert += drive_alert_text + '\n'

print(email_alert)

Unhealthy object(s) found. See below for info
and engage Qumulo Support in your preferred fashion.

Cluster name: CoffeeTime
Cluster UUID: cf83e828-7ef7-4368-a75b-3b972d10f2c6
Approx. time: 2021-05-15T22:23:08.16670735Z

Event(s) found:
            Node number: 2
            Node status: offline
            Serial Number: QVIRT
            Node UUID: cbdea0e3-1659-48af-b15b-e97dbbeefd04           
            Node Type: QVIRT
            Qumulo Core Version: Qumulo Core 3.3.2
            
            Node number: 2
            Drive slot: 2
            Drive status: unhealthy
            Slot type: SSD
            Disk type: SSD
            Disk model: Virtual_disk
            Disk serial number: 
            Disk capacity: 10467934208
            



In [84]:
print(node_alert_text)


            Cluster name: CoffeeTime
            Node number: 2
            Node status: offline
            Serial Number: QVIRT
            Cluster UUID: cf83e828-7ef7-4368-a75b-3b972d10f2c6
            Node UUID: cbdea0e3-1659-48af-b15b-e97dbbeefd04           
            Node Type: QVIRT
            Qumulo Core Version: Qumulo Core 3.3.2    
            


In [37]:
def generate_alert_email(alert_data, rest_client):
    """
    Generate email alert and return as string
    """

    qq_version = get_qq_version(rest_client)
    cluster_name = get_cluster_name(rest_client)
    cluster_uuid = get_cluser_uuid(rest_client)
    cluster_time = get_cluster_time(rest_client)
    
    alert_header = '=' * 18 + ' CLUSTER EVENT ALERT! ' + '=' * 18
    email_alert = f"""{alert_header}\nUnhealthy object(s) found. See below for info
    and engage Qumulo Support in your preferred fashion.

    Cluster name: {cluster_name}
    Cluster UUID: {cluster_uuid}
    Approx. time: {cluster_time}

    Event(s) found:
    """

    node_event_heading = '=' * 15 + ' A node has gone offline. ' + '=' * 15
    drive_event_heading = '=' * 15 + ' A drive is no longer healthy. ' + '=' * 15

    for item in alert_data:
        for k,v in alert_data[item].items():
            if k == 'node_status':    # this is a node alert
                email_alert += node_event_heading
                node_alert_text = f"""
                Node number: {alert_data[item]['id']}
                Node status: {alert_data[item]['node_status']}
                Serial Number: {alert_data[item]['serial_number']}
                Node UUID: {alert_data[item]['uuid']}           
                Node Type: {alert_data[item]['model_number']}
                Qumulo Core Version: {qq_version}
                """

                email_alert += node_alert_text + '\n'

            elif k == 'disk_type':    # this is a drive alert
                email_alert += drive_event_heading
                drive_alert_text = f"""
                Node number: {alert_data[item]['node_id']}
                Drive slot: {alert_data[item]['slot']}
                Drive status: {alert_data[item]['state']}
                Slot type: {alert_data[item]['slot_type']}
                Disk type: {alert_data[item]['disk_type']}
                Disk model: {alert_data[item]['disk_model']}
                Disk serial number: {alert_data[item]['disk_serial_number']}
                Disk capacity: {alert_data[item]['capacity']}
                """

                email_alert += drive_alert_text + '\n'
    
    return email_alert

In [41]:
def get_email_recipients():
    """
    Pull email recipients from config file.
    """

    email_recipients = []

    return email_recipients

In [42]:
def send_email(email_alert, email_recipients):
    """
    Send an email populated with alert information to all email addresses in
    receipients list specified in config.py.
    """

    pass

In [44]:
print(f'email alert: {email_alert}')

email alert: 
    ALERT HERE. (Node: <FILL ME OUT>)
    Cluster Name:  <FILL ME OUT>
    Serial Number	<FILL ME OUT>
    Cluster UUID	<FILL ME OUT>
    Node UUID	<FILL ME OUT>
    Node Type	<FILL ME OUT>
    Node Inventory	<FILL ME OUT>
    Software Version	<FILL ME OUT>
    


In [43]:
rest_client = cluster_login(API_HOSTNAME, API_USERNAME, API_PASSWORD)
qq_version = get_qq_version(rest_client)
status_of_nodes = retrieve_status_of_cluster_nodes(rest_client)
status_of_drives = retrieve_status_of_cluster_drives(rest_client)
cluster_status = combine_statuses_formatting(status_of_nodes, status_of_drives)
previous_existed = check_for_previous_state(cluster_status)

if previous_existed:
    changes = compare_states()
    if changes:
        alert_data, healthy = check_for_unhealthy_objects()        
else:
    alert_data, healthy = check_for_unhealthy_objects()

if not healthy:
    email_alert = generate_alert_email(alert_data, rest_client)
    email_recipients = get_email_recipients()
    send_email(email_alert, email_recipients)
else:
    print('New unhealthy objects were NOT found. Closing script') # XXX: Remove after testing
    # XXX: Add script close logic?

Changes found!! Scanning for unhealthy objects.
ALERT!! UNHEALTHY NODE FOUND.
ALERT!! UNHEALTHY DRIVE FOUND.
alert data: {'Event 1': {'id': 2, 'node_status': 'offline', 'node_name': 'CoffeeTime-2', 'uuid': 'cbdea0e3-1659-48af-b15b-e97dbbeefd04', 'model_number': 'QVIRT', 'serial_number': 'QVIRT'}, 'Event 2': {'id': '2.2', 'node_id': 2, 'slot': 2, 'state': 'unhealthy', 'slot_type': 'SSD', 'disk_type': 'SSD', 'disk_model': 'Virtual_disk', 'disk_serial_number': '', 'capacity': '10467934208'}}


In [79]:
rest_client.node_state.get_node_state()['cluster_id']

{'node_id': 1,
 'state': 'ACTIVE',
 'cluster_id': 'cf83e828-7ef7-4368-a75b-3b972d10f2c6'}

In [77]:
help(rest_client.node_state)

Help on RestModule in module qumulo.rest_client object:

class RestModule(builtins.object)
 |  Methods defined here:
 |  
 |  __init__(self, client)
 |  
 |  get_node_state(conninfo, credentials)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)



In [97]:
description='This script will generate email alerts when run based \
            on the configuration passed through in --config. This \
            script requires the Qumulo API Tools which can be \
            downloaded using pip or from the cluster itself.'

In [99]:
print(description

This script will generate email alerts when run based             on the configuration passed through in --config. This             script requires the Qumulo API Tools which can be             downloaded using pip or from the cluster itself.


In [102]:
thisthing = {
    "cluster_settings": {
        "cluster_address": "10.120.0.34",
        "username": "admin",
        "password": "Admin123",
        "rest_port": 8000,
    },
    "email_settings": {
        "mail_to": ["rthompson@qumulo.com","rossthompson89@gmail.com"],
    },
}

In [108]:
thisthing['email_settings']['mail_to']

['rthompson@qumulo.com', 'rossthompson89@gmail.com']

In [105]:
email_recipients = []

for email in thisthing['email_settings']['mail_to']:
    email_recipients += email

In [106]:
email_recipients

['r',
 't',
 'h',
 'o',
 'm',
 'p',
 's',
 'o',
 'n',
 '@',
 'q',
 'u',
 'm',
 'u',
 'l',
 'o',
 '.',
 'c',
 'o',
 'm',
 'r',
 'o',
 's',
 's',
 't',
 'h',
 'o',
 'm',
 'p',
 's',
 'o',
 'n',
 '8',
 '9',
 '@',
 'g',
 'm',
 'a',
 'i',
 'l',
 '.',
 'c',
 'o',
 'm']