# Iterators
- help cleaning up the code 
- uses them instead of for-loops

In [None]:
days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
i=iter(days)
print(next(i))
print(next(i))
print(next(i))
print(next(i))
print(next(i))

Sunday
Monday
Tuesday
Wednesday
Thursday


## enumerate

In [None]:
for i, m in enumerate(days, start=1):
    print(i, m)

1 Sun
2 Mon
3 Tue
4 Wed
5 Thu
6 Fri
7 Sat


## zip

In [None]:
days = [ 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday' ]
day_ger = ['Sonntag', 'Montag', 'Dienstag', 'Mittwoch', 'Donerstag', 'Freitag', 'Samstag']
for m in zip(days, day_ger):
    print(m)

('Sunday', 'Sonntag')
('Monday', 'Montag')
('Tuesday', 'Dienstag')
('Wednesday', 'Mittwoch')
('Thursday', 'Donerstag')
('Friday', 'Freitag')
('Saturday', 'Samstag')


In [None]:
l = list(zip([1, 2, 3], ['a', 'b', 'c']))   # pairs the two list to a list of tuples
d = dict(zip([1, 2, 3], ['a', 'b', 'c']))   # pairs two lists to a dict
print(l)
print(d)


[(1, 'a'), (2, 'b'), (3, 'c')]
{1: 'a', 2: 'b', 3: 'c'}


In [None]:
for i, m in enumerate(zip(days, day_ger)):
    print(i, m[0], '=', m[1], 'in german')

0 Sunday = Sonntag in german
1 Monday = Montag in german
2 Tuesday = Dienstag in german
3 Wednesday = Mittwoch in german
4 Thursday = Donerstag in german
5 Friday = Freitag in german
6 Saturday = Samstag in german


# itertools

In [None]:
import itertools

In [None]:
# cycle iterator cycles over a collection
seq1 = ['Joe', 'Jana', 'Joseph']
cycle1 = itertools.cycle(seq1)
print(next(cycle1))
print(next(cycle1))
print(next(cycle1))
print(next(cycle1)) # cycles to the beginning

Joe
Jana
Joseph
Joe


In [None]:
# count iterator
count1 = itertools.count(100, 10)
print(next(count1))
print(next(count1))
print(next(count1))
print(next(count1))
print(next(count1))

100
110
120
130
140


In [None]:
# accumulate iterator - running addition
vals = [10, 20, 60, 40, 50, 15, 30]
accu = itertools.accumulate(vals)
print(list(accu))

[10, 30, 90, 130, 180, 195, 225]


In [None]:
# goes over the numbers and sticks with the max 
accu2 = itertools.accumulate(vals, max)
print(list(accu2))

[10, 20, 60, 60, 60, 60, 60]


In [None]:
# chain - chains to sequences
x =itertools.chain('ABCD', '1234')
print(list(x))

['A', 'B', 'C', 'D', '1', '2', '3', '4']


In [None]:
def fct(x):
    return x < 40

In [None]:
print(vals)
# drops values as long as (fct retuns True) trigger point is not reached
print(list(itertools.dropwhile(fct, vals)))
# returns values until (fct is False) trigger is reached
print(list(itertools.takewhile(fct, vals)))

[10, 20, 60, 40, 50, 15, 30]
[60, 40, 50, 15, 30]
[10, 20]


# generator functions
- generator functions return a lazy iterator an iterator object with a sequence of values
- These are objects that you can loop over but unlike lists, lazy iterators do not store their contents in memory. 
- Using __yield__ will result in a generator object. - Using __return__ will result in the first line of the file only.
-  Calling a generator function creates an generator object.  However, it does not start running the function.
- The function only executes on next()
-  The difference between yield and return is that yield returns a value and pauses the execution while maintaining the internal states, </br>
whereas the return statement returns a value and terminates the execution of the function. 
-  The generator is called just like a normal function. However, __its execution is paused on encountering the yield keyword.__ </br>
This sends the first value of the iterator stream to the calling environment. However, __local variables and their states are saved internally.__ </br>
This includes any variable bindings local to the generator, the instruction pointer, the internal stack, and any exception handling.</br>
- This allows you to resume function execution whenever you call one of the generator’s methods.  </br>
- That way, when next() is called on a generator object (either explicitly or implicitly within a for loop), </br>
the previously yielded variable num is incremented, and then yielded again. 
- Unless your generator is infinite, __you can iterate through it one time only.__
- Once all values have been evaluated, the generator is deemed exhausted. The iteration will stop and the for loop will exit. 
- If you used next(), then instead you’ll get an explicit StopIteration exception.

One of the __advantages__ of the generator over the iterator is that __elements are generated dynamically.__</br>
Since the next item is generated only after the first is consumed, it is __more memory efficient__ than the iterator. 

    1. Do you need the entire results in memory?
    2. Do you need to reuse the raw results as is?
    3. Is your result reasonably small to fit in the memory?
    4. Do you want to process the results after you have obtained all the results?

If all of the above is yes, then an iterator should suffice. Otherwise, you may want to consider using a generator to benefit from the delayed execution and yielding on the fly.


In [None]:
def get_sequence_upto(x):
    for i in range(x):
        yield i

In [None]:
seq = get_sequence_upto(5)
print(next(seq))
print(next(seq))
print(next(seq))
print(next(seq))
print(next(seq))
print(next(seq))  # The function finally terminates when next() encounters the StopIteration error.

0
1
2
3
4


StopIteration: 

In [None]:
# In the following example, function square_of_sequence() acts as a generator.
# It yields the square of a number successively on every call of next().

def square_of_sequence(x):
    for i in range(x):
        yield i * i


gen = square_of_sequence(5)

while True:
    try:
        print("Received on next(): ", next(gen))
    except StopIteration:
        break

Received on next():  0
Received on next():  1
Received on next():  4
Received on next():  9
Received on next():  16


In [None]:
# We can use the for loop to traverse the elements over the generator. 
# In this case, the next() function is called implicitly and the StopIteration is also automatically taken care of.
squres = square_of_sequence(5)
for sqr in squres:
    print(sqr)

0
1
4
9
16


## generator comprehension
- shorter way of defining simple generator functions.
- They’re useful in the same cases where list comprehensions are used, with an added benefit: </br>
you can create them without building and holding the entire object in memory before iteration. 

In [None]:
liste_ = [x * x for x in range(10) if x %2 ==0]
print(liste_)

# (expression for i in s if condition)
gen = (x * x for x in range(10) if x % 2 == 0)
print(gen)
print(next(gen))
print(next(gen))
print(next(gen))

[0, 4, 16, 36, 64]
<generator object <genexpr> at 0x7ff81ec957b0>
0
4
16


In [None]:
# The generator expression can also be passed in a function. It should be passed without parentheses.
sum(x * x for x in range(10))


285

## A Generator Solution

In [None]:
with open("../data/test.txt") as wwwlog:
    bytecolumn = (line.rsplit(" ", 1)[1] for line in wwwlog)
    bytes_sent = (int(x) for x in bytecolumn if x != '-')
    print("Total", sum(bytes_sent))

Total 135667


## Performance - of generator objects
- list you get from the list comprehension is 87,624 bytes, while the generator object is only 120. 
- This means that the list is over 700 times larger than the generator object!

In [None]:
import sys
nums_squared_lc = [i * 2 for i in range(10000)]
print(sys.getsizeof(nums_squared_lc))

nums_squared_gc = (i ** 2 for i in range(10000))
print(sys.getsizeof(nums_squared_gc))

87616
112


## memory vs. speed
- If the list is smaller than the running machine’s available memory, then list comprehensions can be faster to evaluate than the equivalent generator expression.
- Here, you can see that summing across all values in the list comprehension took about a third of the time as summing across the generator. 
- If speed is an issue and memory isn’t, then a list comprehension is likely a better tool for the job.

In [None]:
import cProfile
cProfile.run('sum([i * 2 for i in range(10000)])')

         5 function calls in 0.001 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001    0.001    0.001 <string>:1(<listcomp>)
        1    0.000    0.000    0.001    0.001 <string>:1(<module>)
        1    0.000    0.000    0.001    0.001 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.sum}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




In [None]:
cProfile.run('sum((i * 2 for i in range(10000)))')

         10005 function calls in 0.002 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    10001    0.001    0.000    0.001    0.000 <string>:1(<genexpr>)
        1    0.000    0.000    0.002    0.002 <string>:1(<module>)
        1    0.000    0.000    0.002    0.002 {built-in method builtins.exec}
        1    0.001    0.001    0.002    0.002 {built-in method builtins.sum}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




## send, throw, close - Advanced Generator Methods

### send()
- Resumes the execution and “sends” a value into the generator function.

In [None]:
def f(x=None):
    while True:
        x = yield
        yield x*2
             
g = f()
next(g)
g.send(4)

8

In [None]:
next(g)
g.send(10)

20

### throw
-allows you to throw exceptions with the generator.

### close
- allows you to stop a generator.