# Imports

In [None]:
from functools import wraps
import re 
import time

# Topics

## Extract All Names from json (or dict) object like the following 

In [4]:
data = {
    'data': [
        {
            'name': 'bob',
            'age': 60,
            'spouse': {'name': 'mary', 'age': 60},
            'children': [
                {
                    'name': 'tom', 
                    'age': 32,
                    'spouse': {
                        'name': 'beth',
                        'age': 32
                    },
                    'children': [
                        {'name': 'rocky', 'age':4},
                        {'name': 'fifi', 'age':2},
                    ]
                },
                {
                    'name': 'jerry', 'age': 30,
                    'spouse': None, 'children': None
                },
            ]
        }
    ]
}

In [5]:
def timeit(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start = time.perf_counter()
        result = func(*args, **kwargs)
        print(f'{func.__name__} took {time.perf_counter() - start} seconds')
        return result
    return wrapper

In [6]:
@timeit
def find_by_regex(data):
    dict_Str = str(data)
    matches = re.findall(r"'name': '([^']*)'", dict_Str)
    return matches

find_by_regex(data)

find_by_regex took 0.0001328999933321029 seconds


['bob', 'mary', 'tom', 'beth', 'rocky', 'fifi', 'jerry']

In [7]:
@timeit
def find(data, output=[]):

    if isinstance(data,dict):
        # if data is a dictionary, first try finding 'name'
        if 'name' in data:
            output.append(data['name'])
        
        # recursively find() all values 
        for value in data.values():
            find(value, output=output)
    
    if isinstance(data,list):
        # if data is a list, recursively find() all elements
        for value in data:
            find(value, output=output)

    return output

In [8]:
find(data,[])

find took 2.7999922167509794e-06 seconds
find took 1.8999999156221747e-06 seconds
find took 1.0999938240274787e-06 seconds
find took 1.2999953469261527e-06 seconds
find took 1.8699996871873736e-05 seconds
find took 8.999923011288047e-07 seconds
find took 8.999923011288047e-07 seconds
find took 8.999923011288047e-07 seconds
find took 7.999915396794677e-07 seconds
find took 1.7099999240599573e-05 seconds
find took 1.0999938240274787e-06 seconds
find took 8.00006091594696e-07 seconds
find took 1.550000160932541e-05 seconds
find took 7.999915396794677e-07 seconds
find took 8.999923011288047e-07 seconds
find took 1.3899989426136017e-05 seconds
find took 4.290000651963055e-05 seconds
find took 8.600000001024455e-05 seconds
find took 7.00005330145359e-07 seconds
find took 7.999915396794677e-07 seconds
find took 2.1000014385208488e-06 seconds
find took 8.00006091594696e-07 seconds
find took 2.890000178012997e-05 seconds
find took 0.00014229999214876443 seconds
find took 0.0005042999982833862 s

['bob', 'mary', 'tom', 'beth', 'rocky', 'fifi', 'jerry']

In [9]:
@timeit
def find2(data):
    queue = [data]
    output = []
    while len(queue) > 0:
        current = queue.pop(0)

        if type(current) == dict:
            if 'name' in current:
                output.append(current['name'])
        
            queue.extend(list(current.values()))
        
        if type(current) == list:
            queue.extend(current)
    
    return output

find2(data)

find2 took 3.609999839682132e-05 seconds


['bob', 'mary', 'tom', 'jerry', 'beth', 'rocky', 'fifi']