In [1]:
from collections import defaultdict, Counter

In [2]:
sentence = 'the quick brown fox jumps over the lazy dog'

counter = defaultdict(int)
for c in sentence:
    counter[c] += 1

print(counter)

defaultdict(<class 'int'>, {'t': 2, 'h': 2, 'e': 3, ' ': 8, 'q': 1, 'u': 2, 'i': 1, 'c': 1, 'k': 1, 'b': 1, 'r': 2, 'o': 4, 'w': 1, 'n': 1, 'f': 1, 'x': 1, 'j': 1, 'm': 1, 'p': 1, 's': 1, 'v': 1, 'l': 1, 'a': 1, 'z': 1, 'y': 1, 'd': 1, 'g': 1})


In [4]:
sentence = 'the quick brown fox jumps over the lazy dog'
counter = Counter()
for c in sentence:
    counter[c] += 1
    
print(counter)

Counter({' ': 8, 'o': 4, 'e': 3, 't': 2, 'h': 2, 'u': 2, 'r': 2, 'q': 1, 'i': 1, 'c': 1, 'k': 1, 'b': 1, 'w': 1, 'n': 1, 'f': 1, 'x': 1, 'j': 1, 'm': 1, 'p': 1, 's': 1, 'v': 1, 'l': 1, 'a': 1, 'z': 1, 'y': 1, 'd': 1, 'g': 1})


# Counter has a specialized constructor

In [7]:
sentence = 'the quick brown fox jumps over the lazy dog'
counter = Counter(sentence)
print(counter)


Counter({' ': 8, 'o': 4, 'e': 3, 't': 2, 'h': 2, 'u': 2, 'r': 2, 'q': 1, 'i': 1, 'c': 1, 'k': 1, 'b': 1, 'w': 1, 'n': 1, 'f': 1, 'x': 1, 'j': 1, 'm': 1, 'p': 1, 's': 1, 'v': 1, 'l': 1, 'a': 1, 'z': 1, 'y': 1, 'd': 1, 'g': 1})


# Find top ten used words in a text

In [9]:
import re

sentence = '''
his module implements pseudo-random number generators for various distributions.

For integers, there is uniform selection from a range. For sequences, there is uniform selection of a random element, 
a function to generate a random permutation of a list in-place, and a function for random sampling without replacement.

On the real line, there are functions to compute uniform, normal (Gaussian), lognormal, negative exponential, gamma, 
and beta distributions. For generating distributions of angles, the von Mises distribution is available.

Almost all module functions depend on the basic function random(), which generates a random float uniformly in the
semi-open range [0.0, 1.0). Python uses the Mersenne Twister as the core generator. It produces 53-bit precision floats 
and has a period of 2**19937-1. The underlying implementation in C is both fast and threadsafe. 
The Mersenne Twister is one of the most extensively tested random number generators in existence. 
However, being completely deterministic, 
it is not suitable for all purposes, and is completely unsuitable for cryptographic purposes.
'''

words = re.split('\W', sentence)
word_count = Counter(words)
print(word_count.most_common(10))

[('', 45), ('a', 8), ('random', 7), ('is', 7), ('the', 7), ('of', 5), ('and', 5), ('for', 4), ('in', 4), ('distributions', 3)]


# Iterate through the counter

In [10]:
counter = Counter('abba')
for c in counter.elements():
    print(c)

a
a
b
b


# Add two counters together

In [12]:
c1 = Counter(a=1, b=2, c=3)
c2 = Counter(b=1, c=2, d=3)

c1.update(c2)
print(c1)

Counter({'c': 5, 'b': 3, 'd': 3, 'a': 1})


# Subtract

In [13]:
c1 = Counter(a=1, b=2, c=3)
c2 = Counter(b=1, c=2, d=3)

c1.subtract(c2)
print(c1)

Counter({'a': 1, 'b': 1, 'c': 1, 'd': -3})


In [14]:
c1 = Counter(a=1, b=2, c=3)
letter_string = 'abc'
c1.subtract(letter_string)
print(c1)

Counter({'c': 2, 'b': 1, 'a': 0})


## Subtract - a practical example

In [15]:
import random
random.seed(0)

widgets = ['battery', 'charger', 'cable', 'case', 'keyboard', 'mouse']

orders = [(random.choice(widgets), random.randint(1, 5)) for _ in range(100)]
refunds = [(random.choice(widgets), random.randint(1, 3)) for _ in range(20)]

sold_counter = Counter()
refund_counter = Counter()

for order in orders:
    sold_counter[order[0]] += order[1]

for refund in refunds:
    refund_counter[refund[0]] += refund[1]
    
net_counter = sold_counter - refund_counter
print(net_counter)

Counter({'keyboard': 58, 'battery': 54, 'mouse': 39, 'case': 36, 'cable': 30, 'charger': 30})


In [25]:
list(repeat(*orders[0]))

['mouse', 'mouse', 'mouse']

In [29]:
from itertools import repeat, chain

widgets = ['battery', 'charger', 'cable', 'case', 'keyboard', 'mouse']

orders = [(random.choice(widgets), random.randint(1, 5)) for _ in range(100)]
refunds = [(random.choice(widgets), random.randint(1, 3)) for _ in range(20)]

sold_counter = Counter(chain.from_iterable(repeat(*order) for order in orders))
refunds_counter = Counter(chain.from_iterable(repeat(*refund) for refund in refunds))

net_counter = sold_counter - refund_counter
print(net_counter)

Counter({'keyboard': 65, 'cable': 58, 'charger': 38, 'case': 36, 'battery': 35, 'mouse': 29})


## If we dont want to use **counter**, we can do this: 

In [32]:
net_sales = {}
for order in orders:
    key = order[0]
    cnt = order[1]
    net_sales[key] = net_sales.get(key, 0) + cnt
    
for refund in refunds:
    key = refund[0]
    cnt = refund[1]
    net_sales[key] = net_sales.get(key, 0) - cnt

# eliminate non-positive values (to mimic what - does for Counters)
net_sales = {k: v for k, v in net_sales.items() if v > 0}

# we now have to sort the dictionary
# this means sorting the keys based on the values
sorted_net_sales = sorted(net_sales.items(), key=lambda t: t[1], reverse=True)

# Top three
print(sorted_net_sales)

[('cable', 65), ('keyboard', 64), ('charger', 41), ('battery', 35), ('case', 35), ('mouse', 29)]
