# Ceph OSD json file format

In [35]:
import json
import glob as glob

## Basics Properties of the json files
1. **Multiple `roots` allowed**. Different roots might belong to different cluster (`default` cluster, `drain` cluster -- shown in `beesly.json`). `"stray"` nodes should also be taken care of.
2. **Hierachical structure varied**. It is much better to handle the hierachical structure as directory hierachy. We already seen hierachical structure as follows:
    - `root -> host`
    - `root -> host -> osd`
    - `root -> rack -> host -> osd`

In [118]:
cd data

[Errno 2] No such file or directory: 'data'
/Users/mike/Documents/GitHub/cs739-osdvisual/data


In [121]:
jsonFiles = glob.glob("product/*.json")
dataPool = {} # Read Json Data into Pool
for fs in jsonFiles:
    with open(fs) as f:
        dataPool[fs] = json.load(f)

In [122]:
print(dataPool.keys())

['product/jim.json.product.json', 'product/jim.product.json', 'product/kelly.product.json', 'product/erin.product.json', 'product/beesly.json.product.json', 'product/beesly.product.json', 'product/kelly.json.product.json', 'product/erin.json.product.json']


In [43]:
def analyzeJsonData(jsonData):
    # 1. Multiple roots are possible
    nodes = data["nodes"]
    idset = set(map(lambda x: x["id"], nodes))
    childrenset = reduce(lambda x,y: set(x) | set(y), 
                         map(lambda x: x.get("children", []), nodes))
    print("    - root(s): {0}".format(childrenset ^ idset))
    
    # 2. OSD have no children, other should have children
    
    # 3. Maximum number of children varies (in this case, 48)
    children_length = map(lambda x: len(x.get("children", [])), nodes)
    print("    - max children in a node: {0}".format(max(children_length)))
    return 

    
for k in dataPool:
    print("-- {} --".format(k))
    data = dataPool[k]
    analyzeJsonData(data)

-- erin.json --
    - root(s): set([-1])
    - max children in a node: 48
-- jim.json --
    - root(s): set([-299, -1])
    - max children in a node: 16
-- beesly.json --
    - root(s): set([-92, -1])
    - max children in a node: 24
-- kelly.json --
    - root(s): set([-1])
    - max children in a node: 14


## Construction 1. A Simple Hierachical Parser

In [123]:
from copy import deepcopy

def indexableNodes(nodes):
    return {str(node["id"]):node for node in nodes}

def getRoots(nodes):
    ROOT_TYPE = "root" # ROOT_ID = 10
    return [ k for k in nodes if nodes[k]["type"] == ROOT_TYPE]


In [124]:
@timeit
def getTree(nodes):
    roots = getRoots(nodes);
    return {
        "name": "_root",
        "children": [getTree_(i, nodes) for i in roots]
    };


def getTree_(i, nodes):
    """
        i: string-typed index
        nodes: the indexable nodes representation
    """
    node = nodes[i]
    
#     print(i, type(i), node)

    if node["type"] == "osd":
        status = 1 if node["status"] == "up" else 0
        ret = { 
            "name": node["name"], 
            "size": status, # Temporary indication of the 
            "data": node
        }
        return ret;
    
    children = node["children"]
    ret = {
        "name": node["name"],
        "children": [getTree_(str(n), nodes) for n in children],
        "data": node
    }
    return ret

## Test

In [129]:
cd data

[Errno 2] No such file or directory: 'data'
/Users/mike/Documents/GitHub/cs739-osdvisual/data


In [131]:
data = json.load(open("rawdata/beesly.json"))
nodes = indexableNodes(data["nodes"])
t = getTree(nodes)
with open("processed.json", "w+") as f:
    json.dump(t, f)

'getTree'  6.39 ms


## Production

- Source json directory: `rawdata`
- Product json directory: `product`

In [113]:
import os

rawdata_dir = "rawdata"
product_dir = "product"

try:
    os.mkdir(product_dir)
    print("Creat product directory")
except:
    print("Exist product directory")


# Find all json files in the rawdata directory
jsonFiles = glob.glob("{0}/*.json".format(rawdata_dir))
for fs in jsonFiles:
    # Construct the path to store data
    basename = os.path.basename(fs)
    fdname = os.path.splitext(basename)[0]
    fdpath = "product/" + fdname + ".product.json"    
    with open(fs) as f:
        data = json.load(f)
        t = getTree(indexableNodes(data["nodes"]))  
        with open(fdpath, "w+") as fd:
            print("Write {0}".format(fs))
            json.dump(t, fd)

Exist product directory
'getTree'  2.53 ms
Write rawdata/jim.json
'getTree'  2.37 ms
Write rawdata/erin.json
'getTree'  0.89 ms
Write rawdata/kelly.json
'getTree'  3.40 ms
Write rawdata/beesly.json
