# Chapter 3. Dictionaries and Sets

In [3]:
from collections import abc

my_dict = {}
isinstance(my_dict, abc.Mapping)

True

## setdefault

In [None]:
# redundant
import sys
import re

WORD_RE = re.compile(r'\w+')

index = {}
with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            occurrences = index.get(word, [])
            occurrences.append(location)
            index[word] = occurrences

In [None]:
# use setdefault
import sys
import rs

WORD_RE = re.compile(r'\w+')

index = {}
with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index.setdefault(word, []).append(location)

In [None]:
# default dict
import sys
import rs
import collections

WORD_RE = rs.compile(r'\w+')

index = collections.defaultdict(list)
with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index[word].append(location)

# The `__missing__` Method

In [4]:
class StrKeyDict0(dict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
    
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

In [5]:
d = StrKeyDict0([('2', 'two'), ('4', 'four')])
d['2']

'two'

In [6]:
d[4]

'four'

In [7]:
d[1]

KeyError: '1'

In [8]:
d.get('2')

'two'

In [9]:
d.get(1, 'N/A')

'N/A'

# Collections

## collections.Counter

In [10]:
import collections

ct = collections.Counter('abracadabra')
ct

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

In [11]:
ct.update('aaaaazzz')
ct

Counter({'a': 10, 'b': 2, 'r': 2, 'c': 1, 'd': 1, 'z': 3})

In [13]:
ct.most_common(2)

[('a', 10), ('z', 3)]

## collections.UserDict
> why it’s preferable to subclass from UserDict rather than from dict is that the built-in has some implementation shortcuts that end up forcing us to override methods that we can just inherit from UserDict with no problems.

In [17]:
class MyClass(dict):
    pass

my_class = MyClass()
isinstance(my_class, dict)

True

In [18]:
# UserDict doesn't inherit dict but has 'data' field to hold dict data
import collections

class MyClass(collections.UserDict):
    pass

my_class = MyClass()
isinstance(my_class, dict)

False

In [19]:
import collections

class StrKeyDict(collections.UserDict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def __contains__(self, key):
        return str(key) in self.data
    
    def __setitem__(self, key, item):
        self.data[str(key)] = item

# types.MappingProxyType
change dict to read-only

In [20]:
from types import MappingProxyType

d = {1: 'A'}
d_proxy = MappingProxyType(d)
d_proxy

mappingproxy({1: 'A'})

In [21]:
d_proxy[1]

'A'

In [22]:
d_proxy[2] = 'x'

TypeError: 'mappingproxy' object does not support item assignment

In [23]:
d[2] = 'B'
d_proxy

mappingproxy({1: 'A', 2: 'B'})

In [24]:
d_proxy[2]

'B'

# Set Theory

In [25]:
l = ['spam', 'spam', 'eggs', 'spam', 'beef', 'beef', 'chicken']
set(l)

{'eggs', 'spam'}

In [27]:
list(set(l))

['eggs', 'spam']

In [29]:
r = ['beef', 'spam', 'eggs']
len(set(l) & set(r))

2

In [30]:
len(set(l).intersection(r))

2

## set Literals

In [32]:
s = {1, 2, 3}
type(s)

set

In [33]:
frozenset(range(10))

frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

In [34]:
from unicodedata import name
{chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}

{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}