# 1.12. Determining the Most Frequently Occurring Items in a Sequence

In [7]:
from collections import Counter # Counter objects can be fed any sequence of hashable input items

words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]
word_counts = Counter(words)
print(word_counts)
top_three = word_counts.most_common(3)
print(top_three)

# increment the count manually
morewords = ['why','are','you','not','looking','in','my','eyes']
for word in morewords:
    word_counts[word] += 1

print(word_counts['eyes'])

word_counts.update(morewords)

Counter({'eyes': 8, 'the': 5, 'look': 4, 'my': 3, 'into': 3, 'around': 2, 'not': 1, "you're": 1, "don't": 1, 'under': 1})
[('eyes', 8), ('the', 5), ('look', 4)]
9


# 1.13. Sorting a List of Dictionaries by a Common Key

In [12]:
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
from operator import itemgetter # itemgetter() function can also accept multiple keys

rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=lambda r: (r['uid']))
rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))


print(rows_by_fname)
print(rows_by_uid)
print(rows_by_lfname)

[{'uid': 1004, 'fname': 'Big', 'lname': 'Jones'}, {'uid': 1003, 'fname': 'Brian', 'lname': 'Jones'}, {'uid': 1002, 'fname': 'David', 'lname': 'Beazley'}, {'uid': 1001, 'fname': 'John', 'lname': 'Cleese'}]
[{'uid': 1001, 'fname': 'John', 'lname': 'Cleese'}, {'uid': 1002, 'fname': 'David', 'lname': 'Beazley'}, {'uid': 1003, 'fname': 'Brian', 'lname': 'Jones'}, {'uid': 1004, 'fname': 'Big', 'lname': 'Jones'}]
[{'uid': 1002, 'fname': 'David', 'lname': 'Beazley'}, {'uid': 1001, 'fname': 'John', 'lname': 'Cleese'}, {'uid': 1004, 'fname': 'Big', 'lname': 'Jones'}, {'uid': 1003, 'fname': 'Brian', 'lname': 'Jones'}]


# 1.14. Sorting Objects Without Native Comparison Support

In [17]:
class User:
    def __init__(self, user_id):
        self.user_id = user_id 
    
    def __repr__(self):
        return 'User({})'.format(self.user_id)

users = [User(23), User(3), User(99)]
sorted(users, key=lambda u: u.user_id)

from operator import attrgetter 
sorted(users, key=attrgetter('user_id'))

[User(3), User(23), User(99)]

# 1.15. Grouping Records Together Based on a Field

In [23]:
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
from operator import itemgetter
from itertools import groupby

# Sort by the desired field first 
rows.sort(key=itemgetter('date'))
print(rows)

# Iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print('  ',i)

from collections import defaultdict 
rows_by_date = defaultdict(list)
for row in rows:
    rows_by_date[row['date']].append(row)
print(rows_by_date)
for r in rows_by_date['07/01/2012']:
    print(r)

[{'date': '07/01/2012', 'address': '5412 N CLARK'}, {'date': '07/01/2012', 'address': '4801 N BROADWAY'}, {'date': '07/02/2012', 'address': '5800 E 58TH'}, {'date': '07/02/2012', 'address': '5645 N RAVENSWOOD'}, {'date': '07/02/2012', 'address': '1060 W ADDISON'}, {'date': '07/03/2012', 'address': '2122 N CLARK'}, {'date': '07/04/2012', 'address': '5148 N CLARK'}, {'date': '07/04/2012', 'address': '1039 W GRANVILLE'}]
07/01/2012
   {'date': '07/01/2012', 'address': '5412 N CLARK'}
   {'date': '07/01/2012', 'address': '4801 N BROADWAY'}
07/02/2012
   {'date': '07/02/2012', 'address': '5800 E 58TH'}
   {'date': '07/02/2012', 'address': '5645 N RAVENSWOOD'}
   {'date': '07/02/2012', 'address': '1060 W ADDISON'}
07/03/2012
   {'date': '07/03/2012', 'address': '2122 N CLARK'}
07/04/2012
   {'date': '07/04/2012', 'address': '5148 N CLARK'}
   {'date': '07/04/2012', 'address': '1039 W GRANVILLE'}
defaultdict(<class 'list'>, {'07/01/2012': [{'date': '07/01/2012', 'address': '5412 N CLARK'}, {'

# 1.16. Filtering Sequence Elements

In [32]:
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
[n for n in mylist if n>0]

[n for n in mylist if n<0]

# generator expression to prduce the filtered values iteratively
pos = (n for n in mylist if n>0)
for x in pos:
    print(x)

values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
    try:
        x = int(val)
        return True 
    except ValueError:
        return False 

ivals = list(filter(is_int, values)) # filter() create an iterator
print(ivals)

mylist = [1, 4, -5, 10, -7, 2, 3, -1]
import math 
[math.sqrt(n) for n in mylist if n > 0]

addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK'
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]

counts = [0, 3, 10, 4, 1, 7, 6, 1]

from itertools import compress 
more5 = [n > 5 for n in counts]
list(compress(addresses, more5))

# filter(), compress() normally returns an iterator.

1
4
10
2
3
['1', '2', '-3', '4', '5']


['5800 E 58TH', '4801 N BROADWAY', '1039 W GRANVILLE']

# 1.17. Extracting a Subset of a Dictionary 

In [35]:
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}

# Make a dictionary of all prices over 200
p1 = { key:value for key, value in prices.items() if value > 200}

# Make a dictionary of tech stocks 
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = { key:value for key, value in prices.items() if key in tech_names }
print(p2)


{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}


# 1.18. Mapping Names to Sequence Elements


In [42]:
from collections import namedtuple
Subscriber = namedtuple('Subscriber',['addr', 'joined'])
sub = Subscriber('jonesy@exmaple.com', '2012-10-19')
sub.addr

from collections import namedtuple # namedtuple is immutable 
Stock = namedtuple('Stock', ['name', 'shares','price'])
def compute_cost(records):
    total = 0.0 
    for rec in records:
        s = Stock(*rec)
        total += s.shares * s.price
    return total
s = Stock('ACME', 100, 123.45)
s = s._replace(shares=75)
s

Stock(name='ACME', shares=75, price=123.45)

# 1.19. Transforming and Reducing Data at the Same Time

In [53]:
nums = [1,2,3,4,5]
s = sum(x*x for x in nums)

# Determine if any .py files exist in a directory 
import os 
files = os.listdir('.')
if any(name.endswith('.py') for name in files):
    print('There be python!')
else:
    print('Sorry, no python.')
    
# Output a tuple as CSV
s = ('ACME', 50, 123.45)
print(','.join(str(x) for x in s))

# Data reduction across fields of a data structure 
portfolio = [
    {'name':'GOOG', 'shares':50},
    {'name':'YHOO', 'shares':75},
    {'name':'AOL', 'shares':20},
    {'name':'SCOX', 'shares':65}
]
min_shares = min(s['shares'] for s in portfolio)
min_shares

# Alternative: Returns {'name':'AOL', 'shares':20}
min_shares = min(portfolio, key=lambda s: s['shares'])
min_shares

There be python!
ACME,50,123.45


{'name': 'AOL', 'shares': 20}

# 1.20. Combining Multiple Mappings into a Single Mapping

In [65]:
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }
from collections import ChainMap # ChainMap takes multiple mappings and makes them logically appear as one 
# duplicate keys, the values from the first mapping get used
c = ChainMap(a,b)
print(c['x']) 
print(c['z'])
len(c)
list(c.keys())
list(c.values())

# Operations that mutate the mapping always affect the first mapping listed 
c['z'] = 10
c['w'] = 40 
del c['x']

values = ChainMap() # uses the original dictionaries
values['x'] = 1 
# Adding a new mapping 
values = values.new_child() 
values['x'] = 2
values = values.new_child()
values['x'] = 3
# Discard last mapping
values = values.parents 
values['x']

a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }
merged = dict(b)
merged.update(a)
merged



1
3


{'x': 1, 'y': 2, 'z': 3}