# groupby 

Grouping records together based on a given field

In [1]:
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

In [2]:
from operator import itemgetter
from itertools import groupby

In [3]:
rows.sort(key=itemgetter('date'))

In [4]:
rows

[{'address': '5412 N CLARK', 'date': '07/01/2012'},
 {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
 {'address': '5800 E 58TH', 'date': '07/02/2012'},
 {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
 {'address': '1060 W ADDISON', 'date': '07/02/2012'},
 {'address': '2122 N CLARK', 'date': '07/03/2012'},
 {'address': '5148 N CLARK', 'date': '07/04/2012'},
 {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}]

In [17]:
print(" DATE ".center(10, '-'))
for date, items in groupby(rows, key=itemgetter('date')):
    print(f"{date}")
    for i in items:
        print(f" -> {i}")

-- DATE --
07/01/2012
 -> {'address': '5412 N CLARK', 'date': '07/01/2012'}
 -> {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
 -> {'address': '5800 E 58TH', 'date': '07/02/2012'}
 -> {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
 -> {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
 -> {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
 -> {'address': '5148 N CLARK', 'date': '07/04/2012'}
 -> {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


*groupby* only looks for consecutive items, so it's important to sort them first.

# compress

In [1]:
addresses = [
    '5412 N CLARK',
    '5148 N CLARK',
    '5800 E 58TH',
    '2122 N CLARK',
    '5645 N RAVENSWOOD',
    '1060 W ADDISON',
    '4801 N BROADWAY',
    '1039 W GRANVILLE',
]

counts = [ 0, 3, 10, 4, 1, 7, 6, 1]

We want to keep the addresses only if the counts value is greater than 5

In [2]:
more5 = [n>5 for n in counts]

In [3]:
more5

[False, False, True, False, False, True, True, False]

In [4]:
from itertools import compress

In [5]:
list(compress(addresses, more5))

['5800 E 58TH', '1060 W ADDISON', '4801 N BROADWAY']

**compress** returns an iterator so it's important to add the **list** in front to get the list

# islice 

This allows to get the slice of anything just like we do with list

In [1]:
a = [1,2,3,4,5,6,7]
a[:3]

[1, 2, 3]

In [3]:
from itertools import islice

In [5]:
islice(a, 3)

<itertools.islice at 0x7fbc381db040>

In [10]:
for value in islice(a, 3):
    print(f"{value = }")         # only with python 3.8+

value = 1
value = 2
value = 3


# takewhile 

In [12]:
from itertools import takewhile

In [14]:
tw = takewhile(lambda x: x<6, a)

In [15]:
for value in tw:
    print(f"{value = }")         # only with python 3.8+

value = 1
value = 2
value = 3
value = 4
value = 5


# product 

In [1]:
from itertools import product

In [6]:
my_dictionary = [dict(x=x, y=y) for x,y in product(range(5), range(5))]

In [7]:
my_dictionary

[{'x': 0, 'y': 0},
 {'x': 0, 'y': 1},
 {'x': 0, 'y': 2},
 {'x': 0, 'y': 3},
 {'x': 0, 'y': 4},
 {'x': 1, 'y': 0},
 {'x': 1, 'y': 1},
 {'x': 1, 'y': 2},
 {'x': 1, 'y': 3},
 {'x': 1, 'y': 4},
 {'x': 2, 'y': 0},
 {'x': 2, 'y': 1},
 {'x': 2, 'y': 2},
 {'x': 2, 'y': 3},
 {'x': 2, 'y': 4},
 {'x': 3, 'y': 0},
 {'x': 3, 'y': 1},
 {'x': 3, 'y': 2},
 {'x': 3, 'y': 3},
 {'x': 3, 'y': 4},
 {'x': 4, 'y': 0},
 {'x': 4, 'y': 1},
 {'x': 4, 'y': 2},
 {'x': 4, 'y': 3},
 {'x': 4, 'y': 4}]