## defaultdict
subclass of dict
* It automatically assigns a default value to missing keys using a specified default_factory
* eliminates the need for manual key existence checks

In [1]:
from collections import defaultdict
from icecream import ic
ic.configureOutput(prefix='', outputFunction=print)
print = ic

#### 1. Creating defaultdict

In [2]:
d= defaultdict(int) # Create a defaultdict with int as default_factory
print(d["missing_key"])

# creation of nested defaultdict
nested = defaultdict(lambda: defaultdict(int))
nested["a"]["b"] += 1
print(nested)
# show all keys and values
print(dict(nested))


d["missing_key"]: 0
nested: defaultdict(<function <lambda> at 0x000002552C3A0540>,
                    {'a': defaultdict(<class 'int'>, {'b': 1})})
dict(nested): {'a': defaultdict(<class 'int'>, {'b': 1})}


{'a': defaultdict(int, {'b': 1})}

#### 2. Default factory types

In [3]:
d= defaultdict(int) # default is int 0
d['a']+=1
print(d)

d= defaultdict(list) # default is empty list [] 
d['a'].append(1)
d['a'].append(2)
print(d)

d= defaultdict(set) # default is empty set {}
d['a'].add(1)
d['a'].add(2)
print(d)

def default_value():
    return 'default value'

d= defaultdict(default_value) # default is 'default value'
print(d['a'])

d= defaultdict(lambda: 'default value') # default is 'default value'
print(d['a'])


d: defaultdict(<class 'int'>, {'a': 1})
d: defaultdict(<class 'list'>, {'a': [1, 2]})
d: defaultdict(<class 'set'>, {'a': {1, 2}})
d['a']: 'default value'
d['a']: 'default value'


'default value'

#### 3. Methods and attributes

In [4]:
# Access keys, values and items
d = defaultdict(int, {'a': 1, 'b': 2}) 
print(d.keys())
print(d.values())
print(d.items())

#  Updating the dictionary
d = defaultdict(int)
d.update({'a': 2, 'b': 4})
print(d)

#  check if a key exists
d = defaultdict(int)
print('a' in d)
d['a'] = 1
print('a' in d)

#  delete a key
d = defaultdict(int, {'a': 1, 'b': 2})
del d['a']
print(d)

# clear the dictionary
d = defaultdict(int, {'a': 1, 'b': 2})
d.clear()
print(d)


d.keys(): dict_keys(['a', 'b'])
d.values(): dict_values([1, 2])
d.items(): dict_items([('a', 1), ('b', 2)])
d: defaultdict(<class 'int'>, {'a': 2, 'b': 4})
'a' in d: False
'a' in d: True
d: defaultdict(<class 'int'>, {'b': 2})
d: defaultdict(<class 'int'>, {})


defaultdict(int, {})

#### 4. Difference with regular dict

In [5]:
d = defaultdict(int)
print(d['missing_key'])

d ={}
print(d['missing_key']) # KeyError: 'missing_key'




d['missing_key']: 0


KeyError: 'missing_key'

#### 5. Usecases

##### counting elements

In [6]:
data = ['apple', 'banana', 'apple', 'orange', 'banana']
d = defaultdict(int)
for item in data:
    d[item] += 1
print(d)

d: defaultdict(<class 'int'>, {'apple': 2, 'banana': 2, 'orange': 1})


defaultdict(int, {'apple': 2, 'banana': 2, 'orange': 1})

##### Grouping Items by Key

In [7]:
data = [('a', 1), ('b', 2), ('a', 3), ('b', 4)]
d = defaultdict(list)
for key, value in data:
    d[key].append(value)
print(d)

d: defaultdict(<class 'list'>, {'a': [1, 3], 'b': [2, 4]})


defaultdict(list, {'a': [1, 3], 'b': [2, 4]})

##### Creating an Adjacency List for Graphs

In [8]:
edges = [('A', 'B'), ('A', 'C'), ('B', 'D'), ('C', 'D')]
graph = defaultdict(list)
for start, end in edges:
    graph[start].append(end)

print(graph)

graph: defaultdict(<class 'list'>, {'A': ['B', 'C'], 'B': ['D'], 'C': ['D']})


defaultdict(list, {'A': ['B', 'C'], 'B': ['D'], 'C': ['D']})

##### Categorizing Data

In [9]:
data = [('fruit', 'apple'), ('fruit', 'banana'), ('veg', 'carrot')]
categories = defaultdict(list)
for category, item in data:
    categories[category].append(item)
print(categories) 

categories: defaultdict(<class 'list'>, {'fruit': ['apple', 'banana'], 'veg': ['carrot']})


defaultdict(list, {'fruit': ['apple', 'banana'], 'veg': ['carrot']})

#### Word Frequency in Text

In [10]:
text = "this is a test this is only a test"
word_counts = defaultdict(int)
for word in text.split():
    word_counts[word] += 1
print(word_counts) 

word_counts: defaultdict(<class 'int'>, {'this': 2, 'is': 2, 'a': 2, 'test': 2, 'only': 1})


defaultdict(int, {'this': 2, 'is': 2, 'a': 2, 'test': 2, 'only': 1})

##### Indexing Words by Their First Letter

In [11]:
words = ['apple', 'banana', 'cherry', 'avocado', 'blueberry']
index = defaultdict(list)
for word in words:
    index[word[0]].append(word)
print(index) 

index: defaultdict(<class 'list'>,
                   {'a': ['apple', 'avocado'],
                    'b': ['banana', 'blueberry'],
                    'c': ['cherry']})


defaultdict(list,
            {'a': ['apple', 'avocado'],
             'b': ['banana', 'blueberry'],
             'c': ['cherry']})