#DataModelDict Class Demonstration

The DataModelDict class is a utility tool for working with structured data models.  It handles the conversions between equivalent representations of json, XML and Python dictionaries.  It also has a few methods associated with checking the data model type and recursively retrieving elements from the model.

Library imports

In [1]:
#imports
from DataModelDict import DataModelDict

##1. Class Basics 

The DataModelDict is a child class of OrderedDict.  As such, within Python it behaves identically to an OrderedDict.

In addition to initilization consistent with an OrderedDict, the DataModelDict initilization also calls the load() method. This means that a DataModelDict can be initilized directly from json/xml formatted data.  See Section 4 for more information.

In [2]:
#Create an empty DataModel
model = DataModelDict()

#Create tiered dictionary for demonstration purposes
model['demo'] = DataModelDict()

model['demo']['cat1'] = DataModelDict()
model['demo']['cat1']['value'] = [100L, 200L, 300L]
model['demo']['cat1']['unit'] = None

model['demo']['cat2'] = []
model['demo']['cat2'].append(DataModelDict([ ('value', 1.23), ('unit', 'eV') ]))
model['demo']['cat2'].append(DataModelDict([ ('value', 1.29), ('unit', 'eV') ]))

print model

DataModelDict([('demo', DataModelDict([('cat1', DataModelDict([('value', [100L, 200L, 300L]), ('unit', None)])), ('cat2', [DataModelDict([('value', 1.23), ('unit', 'eV')]), DataModelDict([('value', 1.29), ('unit', 'eV')])])]))])


In [3]:
#test iteration
for key in model:
    print key    
print

#test iteritems
for key, value in model.iteritems():
    print key, '->', value
print

#test iteritems on subelement
for key, value in model['demo'].iteritems():
    print key, '->', value 

demo

demo -> DataModelDict([('cat1', DataModelDict([('value', [100L, 200L, 300L]), ('unit', None)])), ('cat2', [DataModelDict([('value', 1.23), ('unit', 'eV')]), DataModelDict([('value', 1.29), ('unit', 'eV')])])])

cat1 -> DataModelDict([('value', [100L, 200L, 300L]), ('unit', None)])
cat2 -> [DataModelDict([('value', 1.23), ('unit', 'eV')]), DataModelDict([('value', 1.29), ('unit', 'eV')])]


##2. Class Methods for General Functionality

The methods here add additional functionality to the model that assist with the handling and manipulation of the structured data.

###append(key, value)
If key is not assigned, assigns key. If key is assigned, appends value to the current value (and converts to list if needed).

In [4]:
print model['demo'].get('cat3', None)

model['demo'].append('cat3', 'first')
print model['demo'].get('cat3', None)

model['demo'].append('cat3', 'second')
print model['demo'].get('cat3', None)

model['demo'].append('cat3', 'third')
print model['demo'].get('cat3', None)

None
first
['first', 'second']
['first', 'second', 'third']


###find(key, **kwargs)

Return a DataModelDict corresponding to a subelement given by key. Additional keyword arguments can be used to refine the search. Returns None if no matching elements found.  Raises an error if multiple matching elements are found.

In [5]:
#find with one matching subelement
print "model.find('cat1') ->", model.find('cat1')
print

#find with no matching subelement
print "model.find('not-there') ->", model.find('not-there')
print 

#find with multiple matching subelements
print "model.find('cat2') ->", 
try:
    print model.find('cat2')
except ValueError as detail:
     print '    ValueError raised:', detail
print 

#find with additional keyword search refinement
print "model.find('cat2', value=1.23) ->", 
try:
    print model.find('cat2', value=1.23)
except ValueError as detail:
     print '    ValueError raised:', detail

    

model.find('cat1') -> DataModelDict([('cat1', DataModelDict([('value', [100L, 200L, 300L]), ('unit', None)]))])

model.find('not-there') -> None

model.find('cat2') ->     ValueError raised: Multiple matching subelements found for key (and kwargs).

model.find('cat2', value=1.23) -> DataModelDict([('cat2', DataModelDict([('value', 1.23), ('unit', 'eV')]))])


###iterlist(key)

Return an iterator over an element's value(s).  Useful if the element may or may not be a list of values.

In [30]:
#test iterlist for a key value that is a list
print "model['demo']['cat1']['value'] ->", model['demo']['cat1']['value']
print "for val in model['demo']['cat1'].iterlist('value'):"
for val in model['demo']['cat1'].iterlist('value'):
    print "    val ->", val
print     

#test iterlist for a key value that is not a list
print "model['demo']['cat1']['unit'] ->", model['demo']['cat1']['unit']
print "for val in model['demo']['cat1'].iterlist('unit'):"
for val in model['demo']['cat1'].iterlist('unit'):
    print "    val ->", val
print

#test iterlist for a key that doesn't exist
print "for val in model['demo']['cat1'].iterlist('not-there'):"
for val in model['demo']['cat1'].iterlist('not-there'):
    print "    val ->", val    

model['demo']['cat1']['value'] -> [100, 200, 300]
for val in model['demo']['cat1'].iterlist('value'):
    val -> 100
    val -> 200
    val -> 300

model['demo']['cat1']['unit'] -> None
for val in model['demo']['cat1'].iterlist('unit'):
    val -> None

for val in model['demo']['cat1'].iterlist('not-there'):


###iterlistall(key)

Return an iterator over all subelements at any level with the given key name.

In [7]:
print "for val in model.iterlistall('value'):"
for val in model.iterlistall('value'):
    print "    val ->", val

for val in model.iterlistall('value'):
    val -> 100
    val -> 200
    val -> 300
    val -> 1.23
    val -> 1.29


###list(key)

Return an element's value(s) as a list.  Useful if the element may or may not be a list of values.

In [9]:
#test list for a key value that is a list
print "model['demo']['cat1'].list('value') ->", model['demo']['cat1'].list('value')   

#test list for a key value that is not a list
print "model['demo']['cat1'].list('unit') ->", model['demo']['cat1'].list('unit')   

#test list for a key that doesn't exist
print "model['demo']['cat1'].list('not-there') ->", model['demo']['cat1'].list('not-there')   

model['demo']['cat1'].list('value') -> [100L, 200L, 300L]
model['demo']['cat1'].list('unit') -> [None]
model['demo']['cat1'].list('not-there') -> []


###listall(key)

Return a list of all subelements at any level with the given key name.

In [6]:
#findall with no occurence
print "model.listall('not-there') ->", model.listall('not-there')
print 

#findall with multiple occurences
print "model.lsitall('value') ->", model.listall('value')

model.listall('not-there') -> []

model.lsitall('value') -> [100L, 200L, 300L, 1.23, 1.29]


##3. Class Methods for Format Conversion

These methods allow for conversions of the data model to and from the different representations

###json(fp=None, indent=None, separators=(', ', ': '))

Return the DataModelDict in json format. The keyword arguments are:

- __fp__ = file-like object.  If given, then the json will be written to fp instead of returned.

- __indent__ = integer indentation spacing.  Default is None, which will print inline.

- __separators__ = tuple of (item, dictionary) separators. Default is (', ', ': ')

In [10]:
#Default json
print model.json()

{"demo": {"cat1": {"value": [100, 200, 300], "unit": null}, "cat2": [{"value": 1.23, "unit": "eV"}, {"value": 1.29, "unit": "eV"}], "cat3": ["first", "second", "third"]}}


In [11]:
#json with indent
print model.json(indent=4)

{
    "demo": {
        "cat1": {
            "value": [
                100, 
                200, 
                300
            ], 
            "unit": null
        }, 
        "cat2": [
            {
                "value": 1.23, 
                "unit": "eV"
            }, 
            {
                "value": 1.29, 
                "unit": "eV"
            }
        ], 
        "cat3": [
            "first", 
            "second", 
            "third"
        ]
    }
}


In [12]:
#json with different separators
print model.json(indent=2, separators= (',', ':'))

{
  "demo":{
    "cat1":{
      "value":[
        100,
        200,
        300
      ],
      "unit":null
    },
    "cat2":[
      {
        "value":1.23,
        "unit":"eV"
      },
      {
        "value":1.29,
        "unit":"eV"
      }
    ],
    "cat3":[
      "first",
      "second",
      "third"
    ]
  }
}


In [13]:
#json on a subelement DataModelDict
print model['demo']['cat1'].json(indent=1)

{
 "value": [
  100, 
  200, 
  300
 ], 
 "unit": null
}


In [14]:
#json to file
with open('test.json', 'w') as f:
    model.json(fp=f, indent=2)
    
with open('test.json') as f:
    print f.read()

{
  "demo": {
    "cat1": {
      "value": [
        100, 
        200, 
        300
      ], 
      "unit": null
    }, 
    "cat2": [
      {
        "value": 1.23, 
        "unit": "eV"
      }, 
      {
        "value": 1.29, 
        "unit": "eV"
      }
    ], 
    "cat3": [
      "first", 
      "second", 
      "third"
    ]
  }
}


###xml(fp=None, indent=None, full_document=True)

Return the DataModelDict in xml format.  The keyword arguments are:

- __fp__ = file-like object.  If given, then the json will be written to fp instead of returned.

- __indent__ = integer indentation spacing.  Default is None, which will print inline.

- __full_document__ = boolean indicating whether the xml string is for the full document (including header) or a subelement.  Default is True.

In [15]:
#default xml
print model.xml()

<?xml version="1.0" encoding="utf-8"?>
<demo><cat1><value>100</value><value>200</value><value>300</value><unit></unit></cat1><cat2><value>1.23</value><unit>eV</unit></cat2><cat2><value>1.29</value><unit>eV</unit></cat2><cat3>first</cat3><cat3>second</cat3><cat3>third</cat3></demo>


In [16]:
#xml with indent
print model.xml(indent=0)
xml_string = model.xml(indent=0)

<?xml version="1.0" encoding="utf-8"?>
<demo>
<cat1>
<value>100</value>
<value>200</value>
<value>300</value>
<unit></unit>
</cat1>
<cat2>
<value>1.23</value>
<unit>eV</unit>
</cat2>
<cat2>
<value>1.29</value>
<unit>eV</unit>
</cat2>
<cat3>first</cat3>
<cat3>second</cat3>
<cat3>third</cat3>
</demo>


In [17]:
#xml with different indent
print model.xml(indent=4)

<?xml version="1.0" encoding="utf-8"?>
<demo>
    <cat1>
        <value>100</value>
        <value>200</value>
        <value>300</value>
        <unit></unit>
    </cat1>
    <cat2>
        <value>1.23</value>
        <unit>eV</unit>
    </cat2>
    <cat2>
        <value>1.29</value>
        <unit>eV</unit>
    </cat2>
    <cat3>first</cat3>
    <cat3>second</cat3>
    <cat3>third</cat3>
</demo>


In [18]:
#xml of subelement
print model['demo']['cat1'].xml(full_document=False)

<value>100</value><value>200</value><value>300</value><unit></unit>


In [19]:
#xml to file
with open('test.xml', 'w') as f:
    model.xml(f, indent=2)
    
with open('test.xml') as f:
    print f.read()

<?xml version="1.0" encoding="utf-8"?>
<demo>
  <cat1>
    <value>100</value>
    <value>200</value>
    <value>300</value>
    <unit></unit>
  </cat1>
  <cat2>
    <value>1.23</value>
    <unit>eV</unit>
  </cat2>
  <cat2>
    <value>1.29</value>
    <unit>eV</unit>
  </cat2>
  <cat3>first</cat3>
  <cat3>second</cat3>
  <cat3>third</cat3>
</demo>


###load(model, parse_float=None, parse_int=None)

The load() method reads in a string or file-like object in json or xml format.  All dictionary levels are made DataModelDict objects.  Keyword values (True, False, None, nan, inf, -inf, and json equivalents) are also interpreted. Keyword Arguments are:

- __model__ = string, unicode or file-like object containing the json/xml formatted data. Required.

- __parse_float__ = numeric data type to convert floating point numbers to. Default is float.

- __parse_int__ = numeric data type to convert integer numbers to.  Note that parse_int conversion is attempted before parse_float, so if the parse_int type can interpret floats then all values will be of parse_int. Default is int.



In [20]:
#print original dictionary
print model

DataModelDict([('demo', DataModelDict([('cat1', DataModelDict([('value', [100L, 200L, 300L]), ('unit', None)])), ('cat2', [DataModelDict([('value', 1.23), ('unit', 'eV')]), DataModelDict([('value', 1.29), ('unit', 'eV')])]), ('cat3', ['first', 'second', 'third'])]))])


In [21]:
#save as json and xml strings
json_string = model.json()
xml_string = model.xml()
print json_string
print 
print xml_string

{"demo": {"cat1": {"value": [100, 200, 300], "unit": null}, "cat2": [{"value": 1.23, "unit": "eV"}, {"value": 1.29, "unit": "eV"}], "cat3": ["first", "second", "third"]}}

<?xml version="1.0" encoding="utf-8"?>
<demo><cat1><value>100</value><value>200</value><value>300</value><unit></unit></cat1><cat2><value>1.23</value><unit>eV</unit></cat2><cat2><value>1.29</value><unit>eV</unit></cat2><cat3>first</cat3><cat3>second</cat3><cat3>third</cat3></demo>


In [22]:
#clear model (not necessary. just to show that values are gone)
model = DataModelDict()
print model
print 

#load json string
model.load(json_string)
print model
print 

#clear model (not necessary. just to show that values are gone)
model = DataModelDict()
print model
print

#load xml string
model.load(xml_string)
print model

DataModelDict()

DataModelDict([(u'demo', DataModelDict([(u'cat1', DataModelDict([(u'value', [100, 200, 300]), (u'unit', None)])), (u'cat2', [DataModelDict([(u'value', 1.23), (u'unit', u'eV')]), DataModelDict([(u'value', 1.29), (u'unit', u'eV')])]), (u'cat3', [u'first', u'second', u'third'])]))])

DataModelDict()

DataModelDict([(u'demo', DataModelDict([(u'cat1', DataModelDict([(u'value', [100, 200, 300]), (u'unit', None)])), (u'cat2', [DataModelDict([(u'value', 1.23), (u'unit', u'eV')]), DataModelDict([(u'value', 1.29), (u'unit', u'eV')])]), (u'cat3', [u'first', u'second', u'third'])]))])


In [23]:
#import json string values with long integers and complex floats
model.load(json_string, parse_int=long, parse_float=complex)
print model
print

#import xml string values with long integers and complex floats
model.load(xml_string, parse_int=long, parse_float=complex)
print model

DataModelDict([(u'demo', DataModelDict([(u'cat1', DataModelDict([(u'value', [100L, 200L, 300L]), (u'unit', None)])), (u'cat2', [DataModelDict([(u'value', (1.23+0j)), (u'unit', u'eV')]), DataModelDict([(u'value', (1.29+0j)), (u'unit', u'eV')])]), (u'cat3', [u'first', u'second', u'third'])]))])

DataModelDict([(u'demo', DataModelDict([(u'cat1', DataModelDict([(u'value', [100L, 200L, 300L]), (u'unit', None)])), (u'cat2', [DataModelDict([(u'value', (1.23+0j)), (u'unit', u'eV')]), DataModelDict([(u'value', (1.29+0j)), (u'unit', u'eV')])]), (u'cat3', [u'first', u'second', u'third'])]))])


In [24]:
#clear model (not necessary. just to show that values are gone)
model = DataModelDict()
print model
print 

#load json file
with open('test.json') as f:
    model.load(f)
print model
print 

#clear model (not necessary. just to show that values are gone)
model = DataModelDict()
print model
print

#load xml file
with open('test.xml') as f:
    model.load(f)
print model

DataModelDict()

DataModelDict([(u'demo', DataModelDict([(u'cat1', DataModelDict([(u'value', [100, 200, 300]), (u'unit', None)])), (u'cat2', [DataModelDict([(u'value', 1.23), (u'unit', u'eV')]), DataModelDict([(u'value', 1.29), (u'unit', u'eV')])]), (u'cat3', [u'first', u'second', u'third'])]))])

DataModelDict()

DataModelDict([(u'demo', DataModelDict([(u'cat1', DataModelDict([(u'value', [100, 200, 300]), (u'unit', None)])), (u'cat2', [DataModelDict([(u'value', 1.23), (u'unit', u'eV')]), DataModelDict([(u'value', 1.29), (u'unit', u'eV')])]), (u'cat3', [u'first', u'second', u'third'])]))])


###key_to_html(key)

Return a new DataModelDict where all recursive elements with a given key are converted to strings (useful when html included in xml).

In [25]:
model2 = model.key_to_html('cat1')
print model2.json(indent=2)
print   

{
  "demo": {
    "cat1": "<value>100</value><value>200</value><value>300</value><unit></unit>", 
    "cat2": [
      {
        "value": 1.23, 
        "unit": "eV"
      }, 
      {
        "value": 1.29, 
        "unit": "eV"
      }
    ], 
    "cat3": [
      "first", 
      "second", 
      "third"
    ]
  }
}



In [26]:
model2 = model.key_to_html('cat2')
print model2.json(indent=2)
print

{
  "demo": {
    "cat1": {
      "value": [
        100, 
        200, 
        300
      ], 
      "unit": null
    }, 
    "cat2": "<value>1.23</value><unit>eV</unit><value>1.29</value><unit>eV</unit>", 
    "cat3": [
      "first", 
      "second", 
      "third"
    ]
  }
}



__Note:__ Conversion of html string to xml gets encoded.

In [27]:
print model2.xml(indent=2)

<?xml version="1.0" encoding="utf-8"?>
<demo>
  <cat1>
    <value>100</value>
    <value>200</value>
    <value>300</value>
    <unit></unit>
  </cat1>
  <cat2>&lt;value&gt;1.23&lt;/value&gt;&lt;unit&gt;eV&lt;/unit&gt;&lt;value&gt;1.29&lt;/value&gt;&lt;unit&gt;eV&lt;/unit&gt;</cat2>
  <cat3>first</cat3>
  <cat3>second</cat3>
  <cat3>third</cat3>
</demo>


File removal to keep Notebook directory clean.

In [28]:
import os
os.remove('test.json')
os.remove('test.xml')