In [1]:
import os
import json
import csv
import io
from pathlib import Path
from typing import Iterable
from types import SimpleNamespace

from icecream import ic
from dotenv import load_dotenv
from benedict import benedict
from pydantic import BaseModel, Field, EmailStr

In [2]:
department = """
{
  "employees": [
    {
      "id": 1,
      "name": "Alice",
      "contact": {
        "email": "alice@example.com",
        "phone": "123-456-7890"
      }
    },
    {
      "id": 2,
      "name": "Bob",
      "contact": {
        "email": "bob@example.com",
        "phone": "987-654-3210"
      }
    }
  ],
  "department": {
    "name": "Engineering",
    "floor": 5,
    "manager": {
      "name": "Eve",
      "id": 101
    }
  },
  "location": "New York",
  "open_positions": 3,
  "is_hiring": true
}
"""

department_dict = json.loads(department)

In [3]:
employee_1_phone = department_dict["employees"][0]["contact"]["phone"]
print(f"Phone of employee one: {employee_1_phone}")

Phone of employee one: 123-456-7890


What if we want to use dot notation to access the keys of a dictionary? Can we?

Below, I will demonstrate three different ways to achieve this:

1. Using SimpleNamespace (works with JSON files)

2. Using Pydantic by decomposing the structure

3. Using benedict, a library that puts dictionaries on steroids — this will be explored further

### SimpleNamespace

In order access elements via dot notation we will expoit the recursive trait of object hook parameter in the json.load or json.loads function.

In [4]:
s = SimpleNamespace(a=1, b=2, c=SimpleNamespace(a2="a", b2="b", c2="c"))
print(s.a)
print(s.c)
print(s.c.a2)

1
namespace(a2='a', b2='b', c2='c')
a


In [5]:
# Let's illustrate how object_hook works
dummy_json = """
{
    "a": 1,
    "b": 2,
    "c": {
        "a2": "a",
        "b2": "b",
        "c2": "c"
    }
}
"""


def print_input(value):
    print(value)


json.loads(dummy_json, object_hook=print_input)

{'a2': 'a', 'b2': 'b', 'c2': 'c'}
{'a': 1, 'b': 2, 'c': None}


It takes the provided (outermost) dictionary and calls the function specified in **object_hook** for every nested dictionary. The recursion stops when none of the values contain another dictionary.

In [6]:
dummy_dict = json.loads(dummy_json, object_hook=lambda d: SimpleNamespace(**d))
print(dummy_dict.c.a2)

a


In [7]:
# The same approach on the departments json
department_dict = json.loads(department, object_hook=lambda d: SimpleNamespace(**d))
print(department_dict.employees[0].contact.phone)

123-456-7890


Let's try to replicate the recursive behavior of the `object_hook` parameter so we can add dot-notation (attribute) access to dictionaries

In [8]:
from types import SimpleNamespace
from typing import Any


def recursive_simplenamespace(data: dict) -> SimpleNamespace:
    def _convert(value: Any) -> Any:
        """
        Helper function to avoid calling the recursive_simplenamespace if values are primitive
        :param value:
        :return: Either the value itself or a SimpleNamespace or a List[Union[SimpleNamespace|PrimitiveTypes]
        """
        if isinstance(value, dict):
            return recursive_simplenamespace(value)
        elif isinstance(value, list):
            return [_convert(item) for item in value]
        else:
            return value

    converted = {k: _convert(v) for k, v in data.items()}
    return SimpleNamespace(**converted)


department_dict = json.loads(department)
department_simplenamespace = recursive_simplenamespace(department_dict)
print(department_simplenamespace)

namespace(employees=[namespace(id=1, name='Alice', contact=namespace(email='alice@example.com', phone='123-456-7890')), namespace(id=2, name='Bob', contact=namespace(email='bob@example.com', phone='987-654-3210'))], department=namespace(name='Engineering', floor=5, manager=namespace(name='Eve', id=101)), location='New York', open_positions=3, is_hiring=True)


In [9]:
l = [1, 2, 3]
s = {1, 2, 3}


def gen(n):
    for i in range(n):
        yield i


g = gen(3)

ic(isinstance(l, Iterable))
ic(isinstance(s, Iterable))
ic(isinstance(g, Iterable))

ic| isinstance(l, Iterable): True
ic| isinstance(s, Iterable): True
ic| isinstance(g, Iterable): True


True

Notice that json string might support only limited datatypes but if we want to apply the recursive_simplenamespace to a python dictionary we need to make the function a little more generic.

In [10]:
def employee_gen():
    employees = [
        {"id": 1, "name": "Alice", "contact": {"email": "alice@example.com", "phone": "123-456-7890"}},
        {"id": 2, "name": "Bob", "contact": {"email": "bob@example.com", "phone": "987-654-3210"}},
    ]
    for employee in employees:
        yield employee


department_dict = {
    "employees": employee_gen(),
    "department": {"name": "Engineering", "floor": 5, "manager": {"name": "Eve", "id": 101}},
    "location": "New York",
    "open_positions": 3,
    "is_hiring": True,
}


print(recursive_simplenamespace(department_dict))

namespace(employees=<generator object employee_gen at 0x000001BD7CC597E0>, department=namespace(name='Engineering', floor=5, manager=namespace(name='Eve', id=101)), location='New York', open_positions=3, is_hiring=True)


In [11]:
from types import SimpleNamespace


def recursive_simplenamespace(data: dict) -> SimpleNamespace:
    def _convert(value: Any) -> Any:
        """
        Helper function to avoid calling the recursive_simplenamespace if values are primitive
        :param value:
        :return: Either the value itself or a SimpleNamespace or a List[Union[SimpleNamespace|PrimitiveTypes]
        """
        if isinstance(value, dict):
            return recursive_simplenamespace(value)
        elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
            return [_convert(item) for item in value]
        else:
            return value

    converted = {k: _convert(v) for k, v in data.items()}
    return SimpleNamespace(**converted)


department_simplenamespace = recursive_simplenamespace(department_dict)
print(department_simplenamespace)

namespace(employees=[namespace(id=1, name='Alice', contact=namespace(email='alice@example.com', phone='123-456-7890')), namespace(id=2, name='Bob', contact=namespace(email='bob@example.com', phone='987-654-3210'))], department=namespace(name='Engineering', floor=5, manager=namespace(name='Eve', id=101)), location='New York', open_positions=3, is_hiring=True)


Caveat  !!!

SimpleNamespace cannot handle cases when the key starts with digit or contains dot.

In [12]:
data = {"first.name": "Alice", "age": 30}
ns = json.loads(json.dumps(data), object_hook=lambda d: SimpleNamespace(**d))
print(ns.age)  # Works
try:
    print(ns.first.name)  # AttributeError: 'SimpleNamespace' object has no attribute 'first'
except AttributeError as e:
    print(e)
    print(getattr(ns, "first.name", "N/A"))

30
'types.SimpleNamespace' object has no attribute 'first'
Alice


### Pydantic

We will use the fact how pydantic handles custom datatypes

In [13]:
class Manager(BaseModel):
    id_: int = Field(alias="id")
    name: str


class DepartmentInfo(BaseModel):
    name: str
    floor: int
    manager: Manager


class Contact(BaseModel):
    email: EmailStr
    phone: str


class Employee(BaseModel):
    id_: int = Field(alias="id")
    name: str
    contact: Contact


class Department(BaseModel):
    employees: list[Employee]
    department: DepartmentInfo
    location: str


department_model = Department.model_validate_json(department)

In [14]:
department_model.employees

[Employee(id_=1, name='Alice', contact=Contact(email='alice@example.com', phone='123-456-7890')),
 Employee(id_=2, name='Bob', contact=Contact(email='bob@example.com', phone='987-654-3210'))]

In [15]:
department_model.department.manager.name

'Eve'

In [16]:
data = {
    "first.name": "Alice",
    "age": 30,
}


class Data(BaseModel):
    first_name: str = Field(alias="first.name")
    age: int


m = Data.model_validate(data)
m.first_name

'Alice'

`Takeaways`

- With pydantic there is a builtin solution when keys contain dots(do not cover the case where key starts with digit)
- The disadvantage is that we must know the schema of the dict/json in advance so we can define our models.


### Benedict

`Dictionaries on steroid`

In [17]:
bene_department = benedict(department)
ic(isinstance(bene_department, dict))  # subclass of dict
ic(type(bene_department))
print()

ic| isinstance(bene_department, dict): True
ic| type(bene_department): <class 'benedict.dicts.benedict'>





In [18]:
ic(bene_department.employees[0].id)  # keyattr
ic(bene_department["employees[0].id"])  # keypath
ic(bene_department[["employees[0]", "id"]])  # keylist
print()

ic| bene_department.employees[0].id: 1
ic| bene_department["employees[0].id"]: 1
ic| bene_department[["employees[0]", "id"]]: 1





Now what about if the key itself contains dot.

In [19]:
data = {"a": {"a.1": 1, "a.2": 2}, "b": {"b.1": 1, "b.2": 2}}

try:
    benedict(data)
except ValueError as e:
    print(e)

Key should not contain keypath separator '.', found: 'a.1'.


For this issue benedict provides a workaround that restore partially some of the functionalities.

In [20]:
bene_data = benedict(data, keypath_separator="|")
ic(bene_data["a|a.1"])
ic(bene_data[["a", "a.1"]])
ic(bene_data.a["a.1"])
print()

ic| bene_data["a|a.1"]: 1
ic| bene_data[["a", "a.1"]]: 1
ic| bene_data.a["a.1"]: 1





In [21]:
data = {"a": {"a1": 1, "a2": 2}, "b": {"b1": 1, "b2": 2}}

bene_data = benedict(
    data,
    keyattr_enabled=False,
)
try:
    bene_data.a
except AttributeError as e:
    print(e)

'benedict' object has no attribute 'a'


Mutations and Shared-Reference


In [22]:
bene_data = benedict(data)
bene_data.a.a1 = 10

In [23]:
bene_data.a.a1

10

In [24]:
data["a"]["a1"]

10

Notice that benedict dictionaries create a reference to the underlying dictionary.

In [25]:
bene_data = benedict(data)
bene_data["a.a1"] = 1
ic(bene_data.a.a1)
ic(data["a"]["a1"])

ic| bene_data.a.a1: 1
ic| data["a"]["a1"]: 1


1

Other ways to get benedict dictionaries

`From url`

In [26]:
def load_env_file(directory_name: str, filename: str):
    try:
        env_path = Path(__file__).resolve().parent
    except NameError:
        env_path = Path.cwd().resolve().parent
    while env_path != env_path.root:
        candidate = env_path / directory_name / filename
        if candidate.exists():
            load_dotenv(dotenv_path=candidate, override=True)
            break
        env_path = env_path.parent
    else:
        raise FileNotFoundError("scraper.env not found")


load_env_file("configs", "scraper.env")
api_key = os.environ.get("SCRAPEOPS_API_KEY")

fake_headers = benedict(f"https://headers.scrapeops.io/v1/user-agents?{api_key}", format="json")
fake_headers.result

['Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
 'Mozilla/5.0 (iPhone; CPU iPhone OS 18_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Mobile/15E148 Safari/604.1',
 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Safari/605.1.15',
 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15',
 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
 'Mozilla/

`From csv`

In [27]:
# Sample data
data = [
    ["id", "name", "email"],
    [1, "Alice", "alice@example.com"],
    [2, "Bob", "bob@example.com"],
    [3, "Charlie", "charlie@example.com"],
]

# Create an in-memory text stream
csv_buffer = io.StringIO()

# Write CSV data
writer = csv.writer(csv_buffer)
writer.writerows(data)

# Save to a file
with open("output.csv", "w", encoding="utf-8", newline="") as f:
    f.write(csv_buffer.getvalue())

In [28]:
csv_file = benedict.from_csv("output.csv")
ic(csv_file)
csv_file["values"]  # cannot use csv_file.values since it clutters with the builtin method of dictionaries

ic| csv_file: {'values': [{'id': '1', 'name': 'Alice', 'email': 'alice@example.com'}, {'id': '2', 'name': 'Bob', 'email': 'bob@example.com'}, {'id': '3', 'name': 'Charlie', 'email': 'charlie@example.com'}]}


[{'id': '1', 'name': 'Alice', 'email': 'alice@example.com'},
 {'id': '2', 'name': 'Bob', 'email': 'bob@example.com'},
 {'id': '3', 'name': 'Charlie', 'email': 'charlie@example.com'}]

In [29]:
type(csv_file["values"][0])

benedict.dicts.benedict

With regard to create benedict objects from various source, I recommend it only for very simple cases because:

1.  url you can not specify timeouts or headers
2.  csv lines are loaded eagerly
3.  json you cannot use advance deserialization

Finally, there are a lot more sources that benedict can handle which are not covered.


#### Benedict Library's selling points

- traverse
- filter
- match
- flatten

`traverse`

In [33]:
print(department)


{
  "employees": [
    {
      "id": 1,
      "name": "Alice",
      "contact": {
        "email": "alice@example.com",
        "phone": "123-456-7890"
      }
    },
    {
      "id": 2,
      "name": "Bob",
      "contact": {
        "email": "bob@example.com",
        "phone": "987-654-3210"
      }
    }
  ],
  "department": {
    "name": "Engineering",
    "floor": 5,
    "manager": {
      "name": "Eve",
      "id": 101
    }
  },
  "location": "New York",
  "open_positions": 3,
  "is_hiring": true
}



In [39]:
def print_input(_dict, key, value):
    print(f"{_dict=}\n {key=}\n {value=}", end="\n**********\n")


bene_department = benedict(department)
bene_department.traverse(print_input)  # Notice the recursive behaviour of this method

_dict={'employees': [{'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}, {'id': 2, 'name': 'Bob', 'contact': {'email': 'bob@example.com', 'phone': '987-654-3210'}}], 'department': {'name': 'Engineering', 'floor': 5, 'manager': {'name': 'Eve', 'id': 101}}, 'location': 'New York', 'open_positions': 3, 'is_hiring': True}
 key='employees'
 value=[{'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}, {'id': 2, 'name': 'Bob', 'contact': {'email': 'bob@example.com', 'phone': '987-654-3210'}}]
**********
_dict=[{'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}, {'id': 2, 'name': 'Bob', 'contact': {'email': 'bob@example.com', 'phone': '987-654-3210'}}]
 key=0
 value={'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}
**********
_dict={'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890

Below you will find attached the link with the original implementation of traverse method

[link] <https://github.com/fabiocaccamo/python-benedict/blob/main/benedict/core/traverse.py?>


In the last section of this jupyter notebook, I will provide another way to implement this kind of recursive behaviour.

`filter`

In [49]:
starwars = benedict("https://swapi.py4e.com/api/people", format="json")

In [51]:
starwars.results[0]  # Let's say we want to keep starwars whose height is above 175 cm

{'name': 'Luke Skywalker', 'height': '172', 'mass': '77', 'hair_color': 'blond', 'skin_color': 'fair', 'eye_color': 'blue', 'birth_year': '19BBY', 'gender': 'male', 'homeworld': 'https://swapi.py4e.com/api/planets/1/', 'films': ['https://swapi.py4e.com/api/films/1/', 'https://swapi.py4e.com/api/films/2/', 'https://swapi.py4e.com/api/films/3/', 'https://swapi.py4e.com/api/films/6/', 'https://swapi.py4e.com/api/films/7/'], 'species': ['https://swapi.py4e.com/api/species/1/'], 'vehicles': ['https://swapi.py4e.com/api/vehicles/14/', 'https://swapi.py4e.com/api/vehicles/30/'], 'starships': ['https://swapi.py4e.com/api/starships/12/', 'https://swapi.py4e.com/api/starships/22/'], 'created': '2014-12-09T13:50:51.644000Z', 'edited': '2014-12-20T21:17:56.891000Z', 'url': 'https://swapi.py4e.com/api/people/1/'}

In [54]:
def predicate(key, value):
    return key in ["name", "height", "mass"]


[d.filter(predicate) for d in starwars.results]  # Useful to keep the keys you want

[{'name': 'Luke Skywalker', 'height': '172', 'mass': '77'},
 {'name': 'C-3PO', 'height': '167', 'mass': '75'},
 {'name': 'R2-D2', 'height': '96', 'mass': '32'},
 {'name': 'Darth Vader', 'height': '202', 'mass': '136'},
 {'name': 'Leia Organa', 'height': '150', 'mass': '49'},
 {'name': 'Owen Lars', 'height': '178', 'mass': '120'},
 {'name': 'Beru Whitesun lars', 'height': '165', 'mass': '75'},
 {'name': 'R5-D4', 'height': '97', 'mass': '32'},
 {'name': 'Biggs Darklighter', 'height': '183', 'mass': '84'},
 {'name': 'Obi-Wan Kenobi', 'height': '182', 'mass': '77'}]

In [55]:
def predicate(key, value):
    return value in ["Luke Skywalker", "C-3PO"]


[d.filter(predicate) for d in starwars.results]  # Not so useful ...

[{'name': 'Luke Skywalker'}, {'name': 'C-3PO'}, {}, {}, {}, {}, {}, {}, {}, {}]

`match`

In [60]:
print(starwars.dump())

{
    "count": 87,
    "next": "https://swapi.py4e.com/api/people/?page=2",
    "previous": null,
    "results": [
        {
            "birth_year": "19BBY",
            "created": "2014-12-09T13:50:51.644000Z",
            "edited": "2014-12-20T21:17:56.891000Z",
            "eye_color": "blue",
            "films": [
                "https://swapi.py4e.com/api/films/1/",
                "https://swapi.py4e.com/api/films/2/",
                "https://swapi.py4e.com/api/films/3/",
                "https://swapi.py4e.com/api/films/6/",
                "https://swapi.py4e.com/api/films/7/"
            ],
            "gender": "male",
            "hair_color": "blond",
            "height": "172",
            "homeworld": "https://swapi.py4e.com/api/planets/1/",
            "mass": "77",
            "name": "Luke Skywalker",
            "skin_color": "fair",
            "species": [
                "https://swapi.py4e.com/api/species/1/"
            ],
            "starships": [
       

In [57]:
starwars.match("count")

[87]

In [61]:
starwars.match("results[*].name")

['Luke Skywalker',
 'C-3PO',
 'R2-D2',
 'Darth Vader',
 'Leia Organa',
 'Owen Lars',
 'Beru Whitesun lars',
 'R5-D4',
 'Biggs Darklighter',
 'Obi-Wan Kenobi']

In [66]:
starwars.match("*.name")  # this will fetch now all the values whose key is name.

['Luke Skywalker',
 'C-3PO',
 'R2-D2',
 'Darth Vader',
 'Leia Organa',
 'Owen Lars',
 'Beru Whitesun lars',
 'R5-D4',
 'Biggs Darklighter',
 'Obi-Wan Kenobi']

Let me show the difference between "results[\*].name" and  "*.name"

In [75]:
starwars.name = {"name": "demo for match method"}

In [76]:
starwars.match("results[*].name")  # it iterates the list of starwars.results

['Luke Skywalker',
 'C-3PO',
 'R2-D2',
 'Darth Vader',
 'Leia Organa',
 'Owen Lars',
 'Beru Whitesun lars',
 'R5-D4',
 'Biggs Darklighter',
 'Obi-Wan Kenobi']

In [77]:
starwars.match("*.name")  # It will iterate through all the collections of starwars dict

['demo for match method',
 'Luke Skywalker',
 'C-3PO',
 'R2-D2',
 'Darth Vader',
 'Leia Organa',
 'Owen Lars',
 'Beru Whitesun lars',
 'R5-D4',
 'Biggs Darklighter',
 'Obi-Wan Kenobi']

`flatten`

In [80]:
# Let's go back to our department dictionary

print(bene_department.dump())

{
    "department": {
        "floor": 5,
        "manager": {
            "id": 101,
            "name": "Eve"
        },
        "name": "Engineering"
    },
    "employees": [
        {
            "contact": {
                "email": "alice@example.com",
                "phone": "123-456-7890"
            },
            "id": 1,
            "name": "Alice"
        },
        {
            "contact": {
                "email": "bob@example.com",
                "phone": "987-654-3210"
            },
            "id": 2,
            "name": "Bob"
        }
    ],
    "is_hiring": true,
    "location": "New York",
    "open_positions": 3
}


In [86]:
# Notice that it is not apply the flatten to dictionaries inside the list
print(bene_department.flatten(separator="_").dump())

{
    "department_floor": 5,
    "department_manager_id": 101,
    "department_manager_name": "Eve",
    "department_name": "Engineering",
    "employees": [
        {
            "contact": {
                "email": "alice@example.com",
                "phone": "123-456-7890"
            },
            "id": 1,
            "name": "Alice"
        },
        {
            "contact": {
                "email": "bob@example.com",
                "phone": "987-654-3210"
            },
            "id": 2,
            "name": "Bob"
        }
    ],
    "is_hiring": true,
    "location": "New York",
    "open_positions": 3
}


### Bonus section


The goal of this section is to show a way how to apply recursion especially when the function is applied to iterables e.g, dictionaries, lists, sets

Moreover, I will apply the same architecture to add the recursive behaviour to the flatten function. With that, I think it will be clearer that this pattern is reusable.

In [148]:
from functools import singledispatch
from collections.abc import Mapping, Callable


def traverse(dict_, callback):

    @singledispatch
    def apply_callback(value, key, dict_):
        return callback(dict_, key, value)

    @apply_callback.register
    def _(value: Mapping, key, dict_):
        callback(dict_, key, value)
        for nested_key, nested_value in value.items():
            apply_callback(nested_value, nested_key, value)

    # Cannot use Iterable because str and bytes are iterables too...
    @apply_callback.register(list)
    @apply_callback.register(tuple)
    @apply_callback.register(set)
    @apply_callback.register(frozenset)
    def _(value, key, dict_):
        callback(dict_, key, value)  # in this case dict_ is actually a list
        for idx, item in enumerate(value):
            apply_callback(item, idx, value)

    traverse.apply_callback = apply_callback

    for k, v in dict_.items():
        apply_callback(v, k, dict_)
    return None


def print_input(_dict, key, value):
    print(f"{_dict=}\n {key=}\n {value=}", end="\n**********\n")


traverse(bene_department, print_input)

_dict={'employees': [{'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}, {'id': 2, 'name': 'Bob', 'contact': {'email': 'bob@example.com', 'phone': '987-654-3210'}}], 'department': {'name': 'Engineering', 'floor': 5, 'manager': {'name': 'Eve', 'id': 101}}, 'location': 'New York', 'open_positions': 3, 'is_hiring': True}
 key='employees'
 value=[{'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}, {'id': 2, 'name': 'Bob', 'contact': {'email': 'bob@example.com', 'phone': '987-654-3210'}}]
**********
_dict=[{'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}, {'id': 2, 'name': 'Bob', 'contact': {'email': 'bob@example.com', 'phone': '987-654-3210'}}]
 key=0
 value={'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}
**********
_dict={'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890

In [149]:
benedict.traverse = traverse  # overwrite the original traverse mathod
bene_department = benedict.from_json(department)
bene_department.traverse(print_input)

_dict={'employees': [{'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}, {'id': 2, 'name': 'Bob', 'contact': {'email': 'bob@example.com', 'phone': '987-654-3210'}}], 'department': {'name': 'Engineering', 'floor': 5, 'manager': {'name': 'Eve', 'id': 101}}, 'location': 'New York', 'open_positions': 3, 'is_hiring': True}
 key='employees'
 value=[{'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}, {'id': 2, 'name': 'Bob', 'contact': {'email': 'bob@example.com', 'phone': '987-654-3210'}}]
**********
_dict=[{'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}, {'id': 2, 'name': 'Bob', 'contact': {'email': 'bob@example.com', 'phone': '987-654-3210'}}]
 key=0
 value={'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890'}}
**********
_dict={'id': 1, 'name': 'Alice', 'contact': {'email': 'alice@example.com', 'phone': '123-456-7890

In [150]:
# Proof that I have overwritten the original method
bene_department.traverse.apply_callback

<function __main__.traverse.<locals>.apply_callback(value, key, dict_)>

In [124]:
def flatten(data: dict, storage: dict | None = None, sep="_") -> dict:
    if storage is None:
        storage = {}

    @singledispatch
    def execute(value, key, **kwargs):
        storage[key] = value

    @execute.register
    def _(value: Mapping, key, sep=sep, order=None):
        for nested_key, nested_value in value.items():
            if key:
                new_key = f"{key}{sep}{nested_key}"
            else:
                new_key = nested_key

            if order:
                new_key = f"{new_key}{sep}{order}"

            execute(nested_value, new_key)

    # Cannot use Iterable because str and bytes are iterables too...
    @execute.register(list)
    @execute.register(tuple)
    @execute.register(set)
    @execute.register(frozenset)
    def _(value, key, sep=sep, **kwargs):
        for idx, item in enumerate(value, start=1):
            execute(item, key, sep=sep, order=idx)

    execute(data, "")
    return storage

In [122]:
dummy_dict = {
    "a": 1,
    "b": 2,
    "c": {"a2": "a", "b2": "b", "c2": {"a3": "aa", "b3": "bb"}},
    "persons": [{"name": "bob", "age": 22}, {"name": "alice", "age": 23}],
}
flatten(dummy_dict, sep="|")

{'a': 1,
 'b': 2,
 'c|a2': 'a',
 'c|b2': 'b',
 'c|c2|a3': 'aa',
 'c|c2|b3': 'bb',
 'persons|name|1': 'bob',
 'persons|age|1': 22,
 'persons|name|2': 'alice',
 'persons|age|2': 23}

### Strengths of this Implementation
1. **Uses singledispatch elegantly**

This allows recursion to behave differently depending on the type — and keeps the logic organized and extensible.

2. **Avoids the Iterable trap**

 Avoid Iterable because it would also match str, bytes, and dict, which can cause incorrect recursion or infinite loops.

Handles sets, tuples, and lists equally

Registering list, tuple, set, and frozenset ensures consistency across common container types.

3. **Recursion is explicitly controlled by type**

This makes the call stack easier to debug and avoids over-generalization.

4. **Supports custom separators and ordering (index-based)**

Useful for nested lists and maintaining key uniqueness.