

#  Workshop 15
## _Iterators and generators._

### Iterators

#### Iterators and the "for" loop


In [1]:
for i in range(5):
    print(i)

for element in [1, 2, 3]:
    print(element)

for element in (1, 2, 3):
    print(element)

for key in {'one': 1, 'two': 2}:
    print(key)

for char in "123":
    print(char)

for line in open("text.txt"):
    print(line, end='')

0
1
2
3
4
1
2
3
1
2
3
one
two
1
2
3


FileNotFoundError: [Errno 2] No such file or directory: 'text.txt'

#### Under the hood: functions "iter" and "next" 


In [188]:
s = 'abc'
it = iter(s)
print(it)

# print(next(it))
# print(next(it))
# print(next(it))
# print(next(it))


<str_iterator object at 0x10810d860>


In [192]:
next(it)

StopIteration: 

In [196]:
import sys

In [212]:
sys.getsizeof((range(100)))

48

In [195]:
list(range(10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

### Generators


Generators are a simple and powerful tool for creating iterators.


In [218]:
def reverse(data):
    for index in range(len(data)-1, -1, -1):
        yield data[index]

print(reverse)
print(*reverse('golf'))


<function reverse at 0x10809e2f0>
f l o g


In [219]:
def squares(n):
    for i in range(n):
        yield i ** 2


for x in squares(10):
    print(x)

print(list(squares(10)))

0
1
4
9
16
25
36
49
64
81
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [220]:
def fact(n):
    f = 1
    for i in range(1, n):
        f *= i
        yield f


for x in fact(10):
    print(x)



1
2
6
24
120
720
5040
40320
362880


### Generator expressions


In [None]:
sys.getsizeof()

In [222]:
(x*x for x in range(10))

<generator object <genexpr> at 0x108117b48>

In [223]:
for i in (x*x for x in range(10)):
    print(i)


0
1
4
9
16
25
36
49
64
81


In [224]:
for i in (x for x in range(10) if x % 2 == 0):
    print(i)


0
2
4
6
8


In [225]:
math.trunc(math.sqrt(5))

2

In [226]:
import math
for i in (x for x in range(10) if math.sqrt(x) - math.trunc(math.sqrt(x)) == 0):
    print(i)


0
1
4
9


In [227]:
s = sum(i*i for i in range(10))
print(s)



285


In [233]:
from math import pi, asin
{x:sin(x*pi/180) for x in range(0, 91)}



{0: 0.0,
 1: 0.01745240643728351,
 2: 0.03489949670250097,
 3: 0.05233595624294383,
 4: 0.0697564737441253,
 5: 0.08715574274765817,
 6: 0.10452846326765346,
 7: 0.12186934340514748,
 8: 0.13917310096006544,
 9: 0.15643446504023087,
 10: 0.17364817766693033,
 11: 0.1908089953765448,
 12: 0.20791169081775931,
 13: 0.224951054343865,
 14: 0.24192189559966773,
 15: 0.25881904510252074,
 16: 0.27563735581699916,
 17: 0.29237170472273677,
 18: 0.3090169943749474,
 19: 0.32556815445715664,
 20: 0.3420201433256687,
 21: 0.35836794954530027,
 22: 0.374606593415912,
 23: 0.3907311284892737,
 24: 0.40673664307580015,
 25: 0.42261826174069944,
 26: 0.4383711467890774,
 27: 0.45399049973954675,
 28: 0.4694715627858908,
 29: 0.48480962024633706,
 30: 0.49999999999999994,
 31: 0.5150380749100542,
 32: 0.5299192642332049,
 33: 0.5446390350150271,
 34: 0.5591929034707469,
 35: 0.573576436351046,
 36: 0.5877852522924731,
 37: 0.6018150231520483,
 38: 0.6156614753256582,
 39: 0.6293203910498374,
 40: 0.

In [None]:
data = 'golf'
letter_list = [data[i] for i in range(len(data)-1, -1, -1)]
print(letter_list)


In [234]:
print([x + y for x in 'abc' for y in 'lmn'])

['al', 'am', 'an', 'bl', 'bm', 'bn', 'cl', 'cm', 'cn']


### Functional tools

#### filter


In [241]:
?filter

In [242]:
print(*filter(lambda x: x % 2 != 0, range(10,0,-1)))


9 7 5 3 1


In [243]:
lst = ['abc','a','jjjjj','poiuytr','ssss']

In [245]:
print(*filter(lambda x: len(x) > 4, lst ))

jjjjj poiuytr


In [246]:
print(*(i for i in ['abc','a','jjjjj','poiuytr','ssss'] if len(i) > 4))

jjjjj poiuytr


In [247]:
import math
print(*filter(lambda x: math.sqrt(x) - int(math.sqrt(x)) == 0, range(100)))


0 1 4 9 16 25 36 49 64 81


In [None]:
math.sqrt(4)-int(math.sqrt(4))

#### map


In [248]:
lst

['abc', 'a', 'jjjjj', 'poiuytr', 'ssss']

In [252]:
reversed('abc'),'abc'[::-1]

(<reversed at 0x10811a1d0>, 'cba')

In [250]:
list(map(lambda q: q[::-1], lst))

['cba', 'a', 'jjjjj', 'rtyuiop', 'ssss']

In [253]:
print(list(map(lambda x: x * x, range(10))))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [264]:
rule = {
        'sep': '',
        'end': '<-',

       }

In [262]:
print(*map(lambda c: '_' + c.upper() + '_' , 'hello'), sep='', end='<-')

_H__E__L__L__O_<-

In [265]:
print(*map(lambda c: '_' + c.upper() + '_' , 'hello'), **rule)

_H__E__L__L__O_<-

#### reduce


In [267]:
?reduce()

In [270]:
from functools import reduce

# Arithmetic series
print(reduce(lambda a, b: a + b, range(1,5)))

# Factorial
print(reduce(lambda a, b: b-a, range(1, 5))) # 4-(3-(2-1)) = 2

10
2


#### zip


In [273]:
x_list = ['NAME', 'NAME_@','NAME_3']
y_list = [10,3,8]
z_list = [2/3,3/3,1/3]

names = ['Name','Rate','Att']

[i for i in zip(x_list,y_list,z_list)]



[('NAME', 10, 0.6666666666666666),
 ('NAME_@', 3, 1.0),
 ('NAME_3', 8, 0.3333333333333333)]

In [274]:
M

[[10, 20, 30], [7, 5, 3], [1, 2, 3]]

In [282]:
print(*M,sep='\n')

[10, 20, 30]
[7, 5, 3]
[1, 2, 3]


In [305]:
list(zip([1,2,3,4,5,6,7],'abcdfd'))

[(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'f'), (6, 'd')]

In [296]:
print(*(zip([1,2],[1,2,4])),sep='\n')

(1, 1)
(2, 2)


In [310]:
lst
for i, x in enumerate(lst):
    lst[i] = 'x' 
lst

['x', 'x', 'x', 'x', 'x']

In [311]:
for i, x in enumerate(x * x for x in range(10)):
    print(i, " * ", i, " = ", x)


0  *  0  =  0
1  *  1  =  1
2  *  2  =  4
3  *  3  =  9
4  *  4  =  16
5  *  5  =  25
6  *  6  =  36
7  *  7  =  49
8  *  8  =  64
9  *  9  =  81


__partial__

In [325]:
from functools import partial

binStrToInt = partial(int, base=3)
print(binStrToInt('10010'))


84


In [328]:
rule

{'end': '<-', 'sep': ''}

In [329]:
ourF = partial(print,**rule)
ourF

functools.partial(<built-in function print>, sep='', end='<-')

In [324]:
ourF == None

True

In [331]:
ourF('1012',2,3)

101223<-

## Task 1

Implement a generator function that takes several sequences as input and return their elements as a single sequence. First all the elements of the first argument, then the second one, and so on.


In [332]:
sequence1 = '123'
sequence2 = 'ABC'
# implement the function 'combine'
sequence_combined = itertools.chain(sequence1, sequence2)
for symbol in sequence_combined:
    print(symbol, end=' ')

# the output should be
# 1 2 3 A B C


1 2 3 A B C 

In [333]:
range(1.5)

TypeError: 'float' object cannot be interpreted as an integer

In [None]:
def combine(sequence1, sequence2):
    
    yield 

## Task 2

For a number N, output the list of all prime numbers below N. Use the function `math.sqrt` as opposed to `x**0.5` to compute the square root.

Use the function `filter` to get the output list.

### itertools

Library to make various iterators to automate common operations.

https://docs.python.org/3.8/library/itertools.html



**islice()**

Makes slices of any iterable objects.

In [334]:
text_data = '''First line has data
### Second line doesn't have data
Third line has data
### Every even line does not contain data
Every odd line contains data
### No data
Data here
### No data
Data here
'''

with open('slicefile.txt', 'w') as output_file:
    output_file.write(text_data)

In [335]:
with open('slicefile.txt') as input_file:
    for line in input_file:
        print(line.strip())

First line has data
### Second line doesn't have data
Third line has data
### Every even line does not contain data
Every odd line contains data
### No data
Data here
### No data
Data here


In [340]:
import itertools

with open('slicefile.txt') as input_file:
    # itertools.islice(sequence, start, stop, step)
    input_file_even = itertools.islice(input_file, 0, None, 2)

    for line in input_file_even:
        print(line.strip())

<itertools.islice object at 0x108136d18>
First line has data
Third line has data
Every odd line contains data
Data here
Data here


In [346]:
import itertools

print(*(itertools.permutations('123456', 2)), sep='\n')

('1', '2')
('1', '3')
('1', '4')
('1', '5')
('1', '6')
('2', '1')
('2', '3')
('2', '4')
('2', '5')
('2', '6')
('3', '1')
('3', '2')
('3', '4')
('3', '5')
('3', '6')
('4', '1')
('4', '2')
('4', '3')
('4', '5')
('4', '6')
('5', '1')
('5', '2')
('5', '3')
('5', '4')
('5', '6')
('6', '1')
('6', '2')
('6', '3')
('6', '4')
('6', '5')


## Task 3

You are given a list of data pairs. The first element of a pair is a year, the second one is a data point.

Split the data into groups by year using the `itertools.groupby()` function. Then for every year output data points containing numbers over 1000. Use the function `filter` to do it.

Do not use any lists / tuples / sets / dictionaries as variables. Solve everything using `groupby`, `filter`, `map` and other tools for iterable objects.

You can learn how to use `groupby` here: https://docs.python.org/3.8/library/itertools.html#itertools.groupby

The key part is this:

```python
for k, g in groupby(data, keyfunc):
```

You output should be

```
Data for the year 1988
4636 1808 1108
Data for the year 1989
3517
Data for the year 1990
2276 2407 1798
```

In [358]:
from  itertools import groupby

data = [
    (1988, 330),
    (1988, 4636),
    (1988, 1808),
    (1988, 1108),
    (1988, 766),
    (1988, 383),
    (1988, 411),
    (1988, 363),
    (1989, 76),
    (1989, 202),
    (1989, 3517),
    (1989, 451),
    (1989, 132),
    (1989, 141),
    (1989, 193),
    (1990, 111),
    (1990, 2276),
    (1990, 2407),
    (1990, 405),
    (1990, 151),
    (1990, 459),
    (1990, 1798)
]

for k, g in groupby(filter(lambda x: x[1]> 1000,data), key=lambda x: x[0]):
    print(k,*map(lambda x: x[1],g))

1988 4636 1808 1108
1989 3517
1990 2276 2407 1798




---



## Appendix

The following section contains information from a future topic. It is included only as trivia. 



### How iterators are implemented

Iterator that returns letters of a string in a reversed order:

In [None]:
class Reverse:
    """Iterator for looping over a sequence backwards."""

    def __init__(self, data):
        self.data = data
        self.index = len(data)

    def __iter__(self):
        return self

    def __next__(self):
        if self.index == 0:
            raise StopIteration
        self.index = self.index - 1
        return self.data[self.index]


rev = Reverse('spam')
print(iter(rev))

for char in rev:
    print(char)


Iterator that returns factorials

In [None]:
class Fact:
    """Iterator for calculating factorials."""

    def __init__(self, limit):
        self.n = 1
        self.limit = limit
        self.data = 1

    def __iter__(self):
        return self

    def __next__(self):
        if self.n >= self.limit:
            raise StopIteration
        self.data *= self.n
        self.n += 1
        return self.data


for x in Fact(10):
    print(x)
