# Dictionaries and Sets

## Modern dict Syntax

### dict Comprehensions

In [1]:
dial_codes = [
    (880, 'Bangladesh'),
    (55, 'Brazil'),
    (57, 'Colombia'),
    (86, 'China'),
    (91, 'India'),
    (62, 'Indonesia'),
    (81, 'Japan'),
    (234, 'Nigeria'),
    (92, 'Pakistan'),
    (7, 'Russia'),
    (1, 'United States'),
]

country_dial = {country: code for code, country in dial_codes}
country_dial

{'Bangladesh': 880,
 'Brazil': 55,
 'Colombia': 57,
 'China': 86,
 'India': 91,
 'Indonesia': 62,
 'Japan': 81,
 'Nigeria': 234,
 'Pakistan': 92,
 'Russia': 7,
 'United States': 1}

In [2]:
{code: country.upper()
 for country, code in sorted(country_dial.items())
    if code<70
 }

{55: 'BRAZIL',
 57: 'COLOMBIA',
 62: 'INDONESIA',
 7: 'RUSSIA',
 1: 'UNITED STATES'}

### Unpacking Mappings

In [3]:
def dump(**kwargs):
    return kwargs

In [4]:
dump( **{'x':1}, y=2, **{'z':3} )

{'x': 1, 'y': 2, 'z': 3}

In [5]:
{'a': 0, **{'x': 1}, 'y': 2, **{'z': 3, 'x': 4}}

{'a': 0, 'x': 4, 'y': 2, 'z': 3}

### Merging Mappings with |

In [6]:
d1 = {'a': 1, 'b': 3}
d2 = {'a': 2, 'b': 4, 'c': 6}
d1 | d2 # union

{'a': 2, 'b': 4, 'c': 6}

In [7]:
d1

{'a': 1, 'b': 3}

In [8]:
d1 |= d2 # d1 <- d1∪d2
d1

{'a': 2, 'b': 4, 'c': 6}

## Pattern Matching with Mappings

In [9]:
def get_creators(record: dict) -> list:
    match record:
        case {'type':'book', 'api':2, 'authors':[*names]}:
            return names
        case {'type':'book', 'api':1, 'author':name}:
            return [name]
        case {'type':'book'}:
            raise ValueError(f"Invalid 'book' record: {record!r}")
        case {'type':'movie', 'director':name}:
            return [name]
        case _:
            raise ValueError(f'Invalid record: {record!r}')

In [10]:
b1 = dict(api=1, author='Douglas Hofstadter', type='book', title='Gödel, Escher, Bach')
get_creators( b1 )

['Douglas Hofstadter']

In [11]:
from collections import OrderedDict
b2 = OrderedDict(api=2, type='book',
                 title='Python in a Nutshell',
                 authors='Martelli Ravenscroft Holden'.split(),
                 )
get_creators(b2)

['Martelli', 'Ravenscroft', 'Holden']

In [12]:
try:
    get_creators({'type': 'book', 'pages': 770})
except Exception as e:
    print(f"{e=}")

e=ValueError("Invalid 'book' record: {'type': 'book', 'pages': 770}")


In [13]:
try:
    get_creators('Spam, spam, spam')
except Exception as e:
    print(f"{e=}")

e=ValueError("Invalid record: 'Spam, spam, spam'")


In [14]:
food = dict(category='ice cream', flavor='vanilla', cost=199)
match food:
    case {'category': 'ice cream', **details}:
        print(f'Ice cream details: {details}')

Ice cream details: {'flavor': 'vanilla', 'cost': 199}


## Standard API of Mapping Types

In [15]:
from collections import abc

In [16]:
my_dict = {}
isinstance(my_dict, abc.Mapping)

True

In [17]:
isinstance(my_dict, abc.MutableMapping)

True

In [18]:
try:
    tt = (1, 2, (30, 40))
    print(hash(tt))
except Exception as e:
    print(f"{e=}")

-3907003130834322577


In [19]:
try:
    tl = (1, 2, [30, 40])
    print(hash(tt))
except Exception as e:
    print(f"{e=}")

-3907003130834322577


In [20]:
try:
    tfz = (1, 2, frozenset([30, 40]))
    print(hash(tfz))
except Exception as e:
    print(f"{e=}")

5149391500123939311


### Overview of Commong Mapping Methods

The way `d.update(m)` handles its first argument `m` is a prime example of _duck typing_: it first checks whether `m` has a keys method and, if it does, assumes it is a mapping. Otherwise, `update()` falls back to iterating over `m`, assuming its items are `(key, value)` pairs. The constructor for most Python mappings uses the logic of `update()` internally, which means they can be initialized from other mappings or from any iterable object producing `(key, value)` pairs.

A subtle mapping method is `setdefault()`. It avoids redundant key lookups when we need to update the value of an item in place. The next section shows how to use it.

### Inserting or Updating Mutable Values

## Automatic Handling of Missing Keys

### `defaultdict`: Another Take on Missing Keys

In [27]:
"""Build and index mapping word->list of occurrences"""
import collections
import re
import sys

WORD_RE = re.compile(r'\w+')

index = collections.defaultdict(list)
print(f"{sys.argv=}", file=sys.stderr)

# if len(sys.argv)>1:
#   with open(sys.argv[1], encoding='utf-8') as fp:
#       for line_no, line in enumerate(fp, 1):
#           for match in WORD_RE.finditer(line):
#               word = match.group()
#               column_no = match.start+1
#               location = (line_no, column_no)
#               index[word].append(location)
            
#   # display in alphabetical order
#   for word in sorted(index, key=str.upper):
#       print(word, index[word])
# else:
#   print(f"No arguments given, exiting...", file=sys.stderr)

sys.argv=['/opt/conda/lib/python3.11/site-packages/ipykernel_launcher.py', '-f', '/home/jovyan/.local/share/jupyter/runtime/kernel-d2609c84-8179-44d2-8f72-f78d560ad2ac.json']


### The `___missing___` Method

In [28]:
class StrKeyDict0(dict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
        
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

In [29]:
d = StrKeyDict0( [('2', 'two'), ('4', 'four')] )
d

{'2': 'two', '4': 'four'}

In [37]:
try:
    print(f"{d['2']=}")
except KeyError as e:
    print(f"{e=}")

d['2']='two'


In [35]:
try:
    print(f"{d[4]=}")
except KeyError as e:
    print(f"{e=}")

d[4]='four'


In [36]:
try:
    print(f"{d[1]=}")
except KeyError as e:
    print(f"{e=}")

e=KeyError('1')


In [38]:
try:
    print(f"{d.get('2')=}")
except KeyError as e:
    print(f"{e=}")

d.get('2')='two'


In [39]:
try:
    print(f"{d.get(4)=}")
except KeyError as e:
    print(f"{e=}")

d.get(4)='four'


In [40]:
try:
    print(f"{d.get(1, 'N/A')=}")
except KeyError as e:
    print(f"{e=}")

d.get(1, 'N/A')='N/A'


In [41]:
2 in d

True

In [42]:
1 in d

False

In [43]:
'4' in d

True

## Variations of dict

### collections.OrderedDict

Now that the built-in dict also keeps the keys ordered since Python 3.6, the most common reason to use `OrderedDict` is writing code that is backward compatible with earlier Python versions. 

### collections.ChainMap

A `ChainMap` instance holds a list of mappings that can be searched as one. The lookup is performed on each input mapping in the order it appears in the constructor call, and succeeds as soon as the key is found in one of those mappings. 

In [44]:
d1 = dict(a=1, b=3)
d2 = dict(a=2, b=4, c=6)

from collections import ChainMap
chain = ChainMap(d1, d2)
chain['a']

1

In [45]:
chain['c']

6

The `ChainMap` instance does not copy the input mappings, but holds references to them. Updates or insertions to a ChainMap only affect the first input mapping.

In [46]:
chain['c'] = -1
d1

{'a': 1, 'b': 3, 'c': -1}

In [47]:
d2

{'a': 2, 'b': 4, 'c': 6}

### collections.Counter

A mapping that holds an integer count for each key. Updating an existing key adds to its count. This can be used to count instances of hashable objects or as a multiset (dis‐ cussed later in this section). `Counter` implements the `+` and `- `operators to combine tallies, and other useful methods such as `most_common([n])`, which returns an ordered list of tuples with the n most common items and their counts

In [48]:
ct = collections.Counter('abracadabra')
ct

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

In [49]:
ct.update('aaaaazzz')
ct

Counter({'a': 10, 'z': 3, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

In [50]:
ct.most_common(3)

[('a', 10), ('z', 3), ('b', 2)]

### shelve.Shelf

The `shelve` module in the standard library provides persistent storage for a mapping of string keys to Python objects serialized in the `pickle` binary format. The curious name of shelve makes sense when you realize that pickle jars are stored on shelves.

### Subclassing UserDict instead of dict

It’s better to create a new mapping type by extending `collections.UserDict` rather than dict. We realize that when we try to extend our `StrKeyDict0` from the example before to make sure that any keys added to the mapping are stored as `str`.

In [52]:
import collections

class StrKeyDict(collections.UserDict):
    
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def __contains__(self, key):
        return str(key) in self.data
    
    def __setitem__(self, key, item):
        self.data[str(key)] = item

## Immutable Mappings

The mapping types provided by the standard library are all mutable, but you may need to prevent users from changing a mapping by accident.

In [59]:
from types import MappingProxyType

d = {1:'A'}
d_proxy = MappingProxyType(d)
d_proxy

mappingproxy({1: 'A'})

In [60]:
try:
    print(f"{d_proxy[1]=}")
except TypeError as e:
    print(f"{e=}")

d_proxy[1]='A'


In [61]:
try:
    d_proxy[2] = 'x'
    print(f"{d_proxy[2]=}")
except TypeError as e:
    print(f"{e=}")

e=TypeError("'mappingproxy' object does not support item assignment")


In [62]:
try:
    d[2] = 'B'
    print(f"{d_proxy=}")
except TypeError as e:
    print(f"{e=}")

d_proxy=mappingproxy({1: 'A', 2: 'B'})


In [63]:
try:
    print(f"{d_proxy[2]=}")
except TypeError as e:
    print(f"{e=}")

d_proxy[2]='B'


## Dictionary Views

The dict instance methods `.keys()`, `.values()`, and `.items()` return instances of classes called `dict_keys`, `dict_values`, and `dict_items`, respectively. These dictionary views are read-only projections of the internal data structures used in the `dict` implementation. They avoid the memory overhead of the equivalent Python 2 methods that returned lists duplicating data already in the target `dict`, and they also replace the old methods that returned iterators.

In [64]:
d = dict(a=10, b=20, c=30)
values = d.values()
values

dict_values([10, 20, 30])

In [65]:
len(values)

3

In [66]:
list(values)

[10, 20, 30]

In [67]:
reversed(values)

<dict_reversevalueiterator at 0x7f3f19bd44a0>

In [69]:
try:
    values[0]
except Exception as e:
    print(f"{e=}")

e=TypeError("'dict_values' object is not subscriptable")


In [70]:
d['z'] = 99
d

{'a': 10, 'b': 20, 'c': 30, 'z': 99}

In [71]:
values

dict_values([10, 20, 30, 99])

In [73]:
try:
    values_class = type( {}.values() )
    v = values_class()
except Exception as e:
    print(f"{e=}")

e=TypeError("cannot create 'dict_values' instances")


## Set Theory

A set is a collection of unique objects. A basic use case is removing duplication

In [74]:
l = ['spam', 'spam', 'eggs', 'spam', 'bacon', 'eggs']
set(l)

{'bacon', 'eggs', 'spam'}

In [75]:
list(set(l))

['spam', 'bacon', 'eggs']

In [76]:
dict.fromkeys(l).keys()

dict_keys(['spam', 'eggs', 'bacon'])

In [77]:
list( dict.fromkeys(l).keys() )

['spam', 'eggs', 'bacon']

Set elements must be hashable. The `set` type is not hashable, so you can’t build a `set` with nested `set` instances. But `frozenset` is hashable, so you can have frozenset elements inside a `set`.

### Set Literals

The syntax of `set` literals —`{1}`, `{1, 2}`, etc— looks exactly like the math notation, with one important exception: there's no literal notation for the empty `set`, so we must remember to write `set()`.

In [78]:
s = {1}
type(s)

set

In [79]:
s

{1}

In [80]:
s.pop()

1

In [81]:
s

set()

In [82]:
frozenset( range(10) )

frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

### Set Comprehensions

In [83]:
from unicodedata import name

{chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}

{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}

### Practical Consequences of How Sets Work

The `set` and `frozenset` types are both implemented with a hash table

In [86]:
s1 = {i for i in range(0,10)}
print(f"{s1=}\n{len(s1)=}")

s1={0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
len(s1)=10


In [87]:
s2 = {j for j in range(5,15)}
print(f"{s2=}\n{len(s2)=}")

s2={5, 6, 7, 8, 9, 10, 11, 12, 13, 14}
len(s2)=10


In [88]:
print(f"{s1^s2=}\n{len(s1^s2)=}")

s1^s2={0, 1, 2, 3, 4, 10, 11, 12, 13, 14}
len(s1^s2)=10


In [89]:
print(f"{s1.pop()=}")

s1.pop()=0


In [90]:
print(f"{s1=}\n{len(s1)=}")

s1={1, 2, 3, 4, 5, 6, 7, 8, 9}
len(s1)=9


In [91]:
d1 = dict(a=1, b=2, c=3, d=4)
d2 = dict(b=20, d=40, e=50)
d1.keys() & d2.keys()

{'b', 'd'}

In [92]:
s = {'a', 'e', 'i'}
d1.keys() & s

{'a'}

In [93]:
d1.keys() | s

{'a', 'b', 'c', 'd', 'e', 'i'}