TRANSFORM

In [2]:
import collections.abc as collections_abc
import dataclasses
import itertools
import random
import typing

import faker
import more_itertools

In [3]:
def print_iterable(items: collections_abc.Iterable[typing.Any]) -> None:
    for item in items:
        print(item)

In [4]:
@dataclasses.dataclass
class User:
    id: int
    emails: list[str]

In [5]:
@dataclasses.dataclass
class UserEmail:
    user_id: int
    email: str

In [6]:
def gen_fake_users() -> collections_abc.Iterator[User]:
    fake = faker.Faker()
    for _id in range(0, 5):
        yield User(id=_id, emails=[fake.email() for _ in range(random.randrange(3, 5))])

In [7]:
def gen_fake_user_emails() -> collections_abc.Iterator[UserEmail]:
    fake = faker.Faker()
    for user_id in range(0, 5):
        for _ in range(random.randrange(3, 5)):
            yield UserEmail(user_id=user_id, email=fake.email())

UNPACK

In [8]:
def transform_user_to_user_email(users: collections_abc.Iterable[User]) -> collections_abc.Iterator[UserEmail]:
    for user in users:
        for email in user.emails:
            yield UserEmail(user_id=user.id, email=email)

In [9]:
users = list(gen_fake_users())
print_iterable(users)

User(id=0, emails=['dana74@example.net', 'nlong@example.org', 'fitzgeralddaisy@example.net'])
User(id=1, emails=['drakebrianna@example.net', 'rcampbell@example.com', 'amber48@example.net', 'nguyenseth@example.com'])
User(id=2, emails=['wrodriguez@example.org', 'christina22@example.net', 'ljohnson@example.com', 'christopher65@example.org'])
User(id=3, emails=['lesterandrew@example.com', 'kylie52@example.net', 'sheena52@example.net'])
User(id=4, emails=['todd89@example.net', 'walshashley@example.com', 'robinsontricia@example.org', 'hernandezantonio@example.net'])


In [10]:
user_emails = transform_user_to_user_email(users)
print_iterable(user_emails)

UserEmail(user_id=0, email='dana74@example.net')
UserEmail(user_id=0, email='nlong@example.org')
UserEmail(user_id=0, email='fitzgeralddaisy@example.net')
UserEmail(user_id=1, email='drakebrianna@example.net')
UserEmail(user_id=1, email='rcampbell@example.com')
UserEmail(user_id=1, email='amber48@example.net')
UserEmail(user_id=1, email='nguyenseth@example.com')
UserEmail(user_id=2, email='wrodriguez@example.org')
UserEmail(user_id=2, email='christina22@example.net')
UserEmail(user_id=2, email='ljohnson@example.com')
UserEmail(user_id=2, email='christopher65@example.org')
UserEmail(user_id=3, email='lesterandrew@example.com')
UserEmail(user_id=3, email='kylie52@example.net')
UserEmail(user_id=3, email='sheena52@example.net')
UserEmail(user_id=4, email='todd89@example.net')
UserEmail(user_id=4, email='walshashley@example.com')
UserEmail(user_id=4, email='robinsontricia@example.org')
UserEmail(user_id=4, email='hernandezantonio@example.net')


SQUASH

In [11]:
def transform_user_email_to_user(user_emails: collections_abc.Iterable[UserEmail]) -> collections_abc.Iterator[User]:
    current_user: typing.Optional[User] = None
    for user_email in user_emails:
        if current_user and current_user.id != user_email.user_id:
            yield current_user
            current_user = None
        if current_user is None:
            current_user = User(id=user_email.user_id, emails=[])
        current_user.emails.append(user_email.email)
    if current_user:
        yield current_user

In [12]:
user_emails = list(gen_fake_user_emails())
print_iterable(user_emails)

UserEmail(user_id=0, email='wthompson@example.com')
UserEmail(user_id=0, email='guzmanmichelle@example.com')
UserEmail(user_id=0, email='lanetimothy@example.com')
UserEmail(user_id=1, email='anthonychavez@example.com')
UserEmail(user_id=1, email='sarahcox@example.com')
UserEmail(user_id=1, email='veronicaharris@example.com')
UserEmail(user_id=1, email='myerswilliam@example.com')
UserEmail(user_id=2, email='davidfoley@example.net')
UserEmail(user_id=2, email='janetrussell@example.org')
UserEmail(user_id=2, email='curryautumn@example.org')
UserEmail(user_id=3, email='lance86@example.com')
UserEmail(user_id=3, email='christophergonzalez@example.com')
UserEmail(user_id=3, email='james32@example.org')
UserEmail(user_id=3, email='vanessa17@example.net')
UserEmail(user_id=4, email='joelspencer@example.net')
UserEmail(user_id=4, email='garciamatthew@example.com')
UserEmail(user_id=4, email='larrymahoney@example.com')


In [13]:
users = transform_user_email_to_user(user_emails)
print_iterable(users)

User(id=0, emails=['wthompson@example.com', 'guzmanmichelle@example.com', 'lanetimothy@example.com'])
User(id=1, emails=['anthonychavez@example.com', 'sarahcox@example.com', 'veronicaharris@example.com', 'myerswilliam@example.com'])
User(id=2, emails=['davidfoley@example.net', 'janetrussell@example.org', 'curryautumn@example.org'])
User(id=3, emails=['lance86@example.com', 'christophergonzalez@example.com', 'james32@example.org', 'vanessa17@example.net'])
User(id=4, emails=['joelspencer@example.net', 'garciamatthew@example.com', 'larrymahoney@example.com'])


CHUNK

In [14]:
chunked_items = more_itertools.chunked(iterable=range(95), n=20)
print_iterable(chunked_items)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]
[40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]
[60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79]
[80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94]


In [15]:
chunked_items = more_itertools.ichunked(iterable=range(95), n=20)
print_iterable(chunked_items)

<more_itertools.more._IChunk object at 0x7f0c544759f0>
<more_itertools.more._IChunk object at 0x7f0c544753f0>
<more_itertools.more._IChunk object at 0x7f0c544759f0>
<more_itertools.more._IChunk object at 0x7f0c544753f0>
<more_itertools.more._IChunk object at 0x7f0c544759f0>


CHAIN

In [16]:
chunked = more_itertools.chunked(iterable=range(7), n=3)
print_iterable(chunked)

[0, 1, 2]
[3, 4, 5]
[6]


In [17]:
chunked = more_itertools.chunked(iterable=range(7), n=3)
items = itertools.chain(*chunked)
print_iterable(items)

0
1
2
3
4
5
6


In [18]:
chunked = more_itertools.ichunked(iterable=range(7), n=3)
items = itertools.chain(*chunked)
print_iterable(items)

0
1
2
3
4
5
6


In [19]:
chunked = more_itertools.ichunked(iterable=range(7), n=3)
items = itertools.chain.from_iterable(chunked)
print_iterable(items)

0
1
2
3
4
5
6


FILTER

In [20]:
def filter_even_id(users: collections_abc.Iterable[User]) -> collections_abc.Iterator[User]:
    for user in users:
        if user.id % 2 == 0:
            continue
        yield user

In [21]:
users = list(gen_fake_users())
print_iterable(users)

User(id=0, emails=['andrea61@example.net', 'dalton54@example.org', 'taylorkelly@example.com'])
User(id=1, emails=['andrealee@example.org', 'collinsvictor@example.com', 'joshua59@example.com'])
User(id=2, emails=['james85@example.com', 'micheleestes@example.org', 'carlos28@example.com'])
User(id=3, emails=['htaylor@example.net', 'devon02@example.net', 'smithmichelle@example.org', 'lspencer@example.net'])
User(id=4, emails=['kevin68@example.com', 'rachel14@example.com', 'bellchelsea@example.org', 'lisachambers@example.com'])


In [22]:
filtered_users = filter_even_id(users)
print_iterable(filtered_users)

User(id=1, emails=['andrealee@example.org', 'collinsvictor@example.com', 'joshua59@example.com'])
User(id=3, emails=['htaylor@example.net', 'devon02@example.net', 'smithmichelle@example.org', 'lspencer@example.net'])


In [23]:
T = typing.TypeVar("T")


def safe_next(iterator: collections_abc.Iterator[T]) -> typing.Optional[T]:
    try:
        return next(iterator)
    except StopIteration:
        return None

MERGE ITERATORS

In [24]:
def merge_emails(
    email_iter1: collections_abc.Iterator[UserEmail],
    email_iter2: collections_abc.Iterator[UserEmail],
) -> collections_abc.Iterator[UserEmail]:
    email1: typing.Optional[UserEmail] = None
    email2: typing.Optional[UserEmail] = None

    while True:
        email1 = email1 or safe_next(email_iter1)
        if email1 is None:
            yield from email_iter2
            return
        email2 = email2 or safe_next(email_iter2)
        if email2 is None:
            yield from email_iter1
            return

        if email2.user_id > email1.user_id:
            yield email1
            email1 = None
        else:
            yield email2
            email2 = None

In [25]:
emails1 = gen_fake_user_emails()
emails2 = gen_fake_user_emails()

In [26]:
emails = merge_emails(emails1, emails2)
print_iterable(emails)

UserEmail(user_id=0, email='nicole02@example.com')
UserEmail(user_id=0, email='whitney33@example.com')
UserEmail(user_id=0, email='erin65@example.org')
UserEmail(user_id=0, email='rmiller@example.org')
UserEmail(user_id=0, email='johnsonstephanie@example.net')
UserEmail(user_id=0, email='steve33@example.net')
UserEmail(user_id=0, email='matthewmiller@example.org')
UserEmail(user_id=1, email='whall@example.net')
UserEmail(user_id=1, email='robertflores@example.org')
UserEmail(user_id=1, email='amy79@example.org')
UserEmail(user_id=1, email='slee@example.org')
UserEmail(user_id=1, email='marcday@example.com')
UserEmail(user_id=1, email='susan02@example.net')
UserEmail(user_id=1, email='bestes@example.com')
UserEmail(user_id=2, email='fwhite@example.org')
UserEmail(user_id=2, email='oliverkeith@example.net')
UserEmail(user_id=2, email='anndiaz@example.com')
UserEmail(user_id=2, email='ywest@example.net')
UserEmail(user_id=2, email='victorgonzalez@example.net')
UserEmail(user_id=2, email='

MERGE ITEMS IN ITERATORS

In [27]:
def merge_users(
    user_iter1: collections_abc.Iterator[User],
    user_iter2: collections_abc.Iterator[User],
) -> collections_abc.Iterator[User]:
    user1: typing.Optional[User] = None
    user2: typing.Optional[User] = None

    while True:
        user1 = user1 or safe_next(user_iter1)
        if user1 is None:
            yield from user_iter2
            return
        user2 = user2 or safe_next(user_iter2)
        if user2 is None:
            yield from user_iter1
            return

        if user1.id > user2.id:
            yield user2
            user2 = None
        elif user1.id < user2.id:
            yield user1
            user1 = None
        else:
            yield User(id=user1.id, emails=user1.emails + user2.emails)
            user1 = None
            user2 = None

In [28]:
users1 = gen_fake_users()
users2 = gen_fake_users()

In [29]:
emails = merge_users(users1, users2)
print_iterable(emails)

User(id=0, emails=['katherine21@example.net', 'kenneth97@example.net', 'castroann@example.com', 'vyang@example.com', 'eblair@example.org', 'osbornraymond@example.com', 'joycechristopher@example.net'])
User(id=1, emails=['onguyen@example.com', 'colemananna@example.com', 'bradleyjeffrey@example.com', 'adam05@example.com', 'johnsonmatthew@example.com', 'lorrainefloyd@example.net'])
User(id=2, emails=['greenkenneth@example.net', 'jason86@example.net', 'carolyngardner@example.net', 'bmiller@example.org', 'esparzaamy@example.org', 'moorerachael@example.com', 'kristopher73@example.org', 'jennifertaylor@example.net'])
User(id=3, emails=['hannah88@example.net', 'kaitlyndean@example.org', 'sandra26@example.net', 'matthewriddle@example.org', 'michael67@example.net', 'alambert@example.org', 'jeffreyjohnson@example.com', 'dproctor@example.org'])
User(id=4, emails=['fwalsh@example.org', 'rcabrera@example.com', 'jacob12@example.org', 'michaelpatterson@example.org', 'ramseygary@example.org', 'johnsonp

In [30]:
iter_in = range(0, 200)

iter_even_out = ...
iter_odd_out = ...

¿QUESTIONS?
