In [1]:
template = {
    'user_id': int,
    'name': {
        'first': str,
        'last': str
    },
    'bio': {
        'dob': {
            'year': int,
            'month': int,
            'day': int
        },
        'birthplace': {
            'country': str,
            'city': str
        }
    }
}

john = {
    'user_id': 100,
    'name': {
        'first': 'John',
        'last': 'Cleese'
    },
    'bio': {
        'dob': {
            'year': 1939,
            'month': 11,
            'day': 27
        },
        'birthplace': {
            'country': 'United Kingdom',
            'city': 'Weston-super-Mare'
        }
    }
}

eric = {
    'user_id': 101,
    'name': {
        'first': 'Eric',
        'last': 'Idle'
    },
    'bio': {
        'dob': {
            'year': 1943,
            'month': 3,
            'day': 29
        },
        'birthplace': {
            'country': 'United Kingdom'
        }
    }
}

michael = {
    'user_id': 102,
    'name': {
        'first': 'Michael',
        'last': 'Palin'
    },
    'bio': {
        'dob': {
            'year': 1943,
            'month': 'May',
            'day': 5
        },
        'birthplace': {
            'country': 'United Kingdom',
            'city': 'Sheffield'
        }
    }
}

bad_john = {
    'user_id': 100,
    'name': {
        'first': 'John',
        'last': 'Cleese'
    },
    'bio': {
        'dob': {
            'year': 1939,
            'month': 11,
            'day': 27
        },
        'birthplace': {
            'country': 'United Kingdom',
            'city': 'Weston-super-Mare',
            'house': 'Big house'
        }
    }
}

In [19]:
def match_keys(data, valid, path):
    data_keys = data.keys()
    valid_keys = valid.keys()
    
    extra_keys = data_keys - valid_keys
    missing_keys = valid_keys - data_keys
    
    if missing_keys or extra_keys:
        missing_msg = ('missing keys: ' +
                       ', '.join({path + '.' + str(key)
                                for key in missing_keys})
                      ) if missing_keys else ''
        extras_msg = ('extra keys: ' +
                       ', '.join({path + '.' + str(key)
                                for key in extra_keys})
                     ) if extra_keys else ''
        return False, ' '.join((missing_msg, extras_msg))
    else:
        return True, None

In [20]:
t = {'a': int, 'b': int, 'c': int, 'd': {}}
d = {'a': 'wrong type', 'b': 100, 'c': 200, 'd': {'wrong', 'type'}}
is_ok, err_msg = match_keys(d, t, 'some.path')
print(is_ok, err_msg)

True None


In [21]:
d = {'a': None, 'b': None, 'c': None}
is_ok, err_msg = match_keys(d, t, 'some.path')
print(is_ok, err_msg)

False missing keys: some.path.d 


In [22]:
d = {'a': None, 'b': None, 'c': None, 'd': None, 'e': None}
is_ok, err_msg = match_keys(d, t, 'some.path')
print(is_ok, err_msg)

False  extra keys: some.path.e


In [23]:
d = {'a': None, 'b': None, 'c': None, 'e': None}
is_ok, err_msg = match_keys(d, t, 'some.path')
print(is_ok, err_msg)

False missing keys: some.path.d extra keys: some.path.e


In [24]:
d = {'a': None, 'b': None, 'e': None, 'f': None}
is_ok, err_msg = match_keys(d, t, 'some.path')
print(is_ok, err_msg)

False missing keys: some.path.d, some.path.c extra keys: some.path.e, some.path.f


In [25]:
def match_types(data, template, path):
    for key, value in template.items():
        if isinstance(value, dict):
            template_type = dict
        else:
            template_type = value
        data_value = data.get(key, object())
        if not isinstance(data_value, template_type):
            err_msg = ('incorrect type: ' + path + '.' + key +
                      ' -> expected ' + template_type.__name__ +
                      ', found ' + type(data_value).__name__)
            return False, err_msg
    return True, None
            

In [26]:
t = {'a': int, 'b': str, 'c': {'d': int}}
d = {'a': 100, 'b': 'test', 'c': {'some': 'value'}}
match_types(d, t, 'some.path')

(True, None)

In [27]:
d = {'a': 100, 'b': 'test', 'c': 'unexpected'}
match_types(d, t, 'some.path')

(False, 'incorrect type: some.path.c -> expected dict, found str')

In [28]:
d = {'a': 100, 'b': 200, 'c': {'some': 'value'}}
match_types(d, t, 'some.path')

(False, 'incorrect type: some.path.b -> expected str, found int')

In [32]:
def recurse_validate(data, template, path):
    is_ok, err_msg = match_keys(data, template, path)
    if not is_ok:
        return False, err_msg
    
    is_ok, err_msg = match_types(data, template, path)
    if not is_ok:
        return False, err_msg
    
    dictionary_type_keys = {key for key, value in template.items()
                           if isinstance(value, dict)}
    
    for key in dictionary_type_keys:
        sub_path = path + '.' + str(key)
        sub_template = template[key]
        sub_data = data[key]
        is_ok, err_msg = recurse_validate(sub_data, sub_template, sub_path)
        if not is_ok:
            return False, err_msg
    return True, None

In [34]:
is_ok, err_msg = recurse_validate(john, template, 'root')
print(is_ok, err_msg)

True None


In [35]:
is_ok, err_msg = recurse_validate(eric, template, 'root')
print(is_ok, err_msg)

False missing keys: root.bio.birthplace.city 


In [36]:
is_ok, err_msg = recurse_validate(michael, template, 'root')
print(is_ok, err_msg)

False incorrect type: root.bio.dob.month -> expected int, found str


In [37]:
is_ok, err_msg = recurse_validate(bad_john, template, 'root')
print(is_ok, err_msg)

False  extra keys: root.bio.birthplace.house


In [38]:
def validate(data, template):
    return recurse_validate(data, template, '')

In [39]:
persons = ((john, 'John'), (eric, 'Eric'), (michael, 'Michael'), (bad_john, 'Bad John'))

In [40]:
for person, name in persons:
    is_ok, err_msg = validate(person, template)
    print(f'{name}: valid={is_ok}: {err_msg}')

John: valid=True: None
Eric: valid=False: missing keys: .bio.birthplace.city 
Michael: valid=False: incorrect type: .bio.dob.month -> expected int, found str
Bad John: valid=False:  extra keys: .bio.birthplace.house


In [41]:
class SchemaError(Exception):
    pass

In [42]:
def validate(data, template):
    is_ok, err_msg = recurse_validate(data, template, '')
    if not is_ok:
        raise SchemaError(err_msg)

In [43]:
for person, name in persons:
    validate(person, template)


SchemaError: missing keys: .bio.birthplace.city 

In [45]:
try:
    for person, name in persons:
        validate(person, template)
except SchemaError as ex:
    print('Validation failed', str(ex))

Validation failed missing keys: .bio.birthplace.city 


In [46]:
class SchemaKeyMismatch(SchemaError):
    pass

class SchemaTypeMismatch(SchemaError, TypeError):
    pass

In [51]:
def match_keys(data, valid, path):
    data_keys = data.keys()
    valid_keys = valid.keys()
    
    extra_keys = data_keys - valid_keys
    missing_keys = valid_keys - data_keys
    
    if missing_keys or extra_keys:
        missing_msg = ('missing keys: ' +
                       ', '.join({path + '.' + str(key)
                                for key in missing_keys})
                      ) if missing_keys else ''
        extras_msg = ('extra keys: ' +
                       ', '.join({path + '.' + str(key)
                                for key in extra_keys})
                     ) if extra_keys else ''
        raise SchemaKeyMismatch(' '.join((missing_msg, extras_msg)))


In [52]:
def match_types(data, template, path):
    for key, value in template.items():
        if isinstance(value, dict):
            template_type = dict
        else:
            template_type = value
        data_value = data.get(key, object())
        if not isinstance(data_value, template_type):
            err_msg = ('incorrect type: ' + path + '.' + key +
                      ' -> expected ' + template_type.__name__ +
                      ', found ' + type(data_value).__name__)
            raise SchemaTypeMismatch(err_msg)
 
            

In [53]:
def recurse_validate(data, template, path):
    match_keys(data, template, path)
    match_types(data, template, path)
    
    dictionary_type_keys = {key for key, value in template.items()
                           if isinstance(value, dict)}
    
    for key in dictionary_type_keys:
        sub_path = path + '.' + str(key)
        sub_template = template[key]
        sub_data = data[key]
        recurse_validate(sub_data, sub_template, sub_path)

In [54]:
def validate(data, template):
    recurse_validate(data, template, '')

In [55]:
validate(john, template)

In [56]:
validate(eric, template)

SchemaKeyMismatch: missing keys: .bio.birthplace.city 

In [57]:
validate(michael, template)

SchemaTypeMismatch: incorrect type: .bio.dob.month -> expected int, found str

In [58]:
try:
    validate(john, template)
except SchemaError as ex:
    print(ex)

In [59]:
try:
    validate(eric, template)
except SchemaError as ex:
    print(ex)

missing keys: .bio.birthplace.city 


In [62]:
try:
    validate(michael, template)
except SchemaTypeMismatch as ex:
    print(ex)

incorrect type: .bio.dob.month -> expected int, found str


In [70]:
try:
    validate(bad_john, template)
# except SchemaKeyMismatch as ex:
#     print('handling a key mismatch exception', ex)
# except SchemaTypeMismatch as ex:
#     print('handling a type mismatch exception', ex)
except SchemaError as ex:
    print('handling some general schema exception', ex)
except TypeError as ex:
    print('handling a general type exception', ex)

handling some general schema exception  extra keys: .bio.birthplace.house
