In [None]:
%autosave 5

Autosaving every 5 seconds


In [None]:
nums = [2,3,4,5]
for n in nums:
    print(n)

2
3
4
5


In [None]:
for c in "34343":
    print(c)

3
4
3
4
3


In [None]:
for key in {'a' : 1, 'b' : 2, 'c' : 3}:
    print(key)

a
b
c


In [None]:
class Point:

    def __init__(self, x, y):
        self.x = x
        self.y = y

In [None]:
p = Point(3, 5)

In [None]:
for i in p:
    print(i)

TypeError: ignored

In [None]:
numtir = iter(nums)

In [None]:
next(numtir)

2

In [None]:
next(numtir)

3

# Generators

In [None]:
def squares(nums):
    for n in nums:
        yield n*n

In [None]:
nums

[2, 3, 4, 5]

In [None]:
num_sqrs = squares(nums)

In [None]:
num_sqrs

<generator object squares at 0x7f38a4afbcd0>

In [None]:
next(num_sqrs)

4

In [None]:
next(num_sqrs)

9

In [None]:
next(num_sqrs)

16

In [None]:
next(num_sqrs)

25

In [None]:
next(num_sqrs)

StopIteration: ignored

In [None]:
for s in num_sqrs:
    print(s)

In [None]:
for s in squares(nums):
    print(s)

4
9
16
25


In [None]:
def squares(nums):
    print("Starting computing squares")
    for n in nums:
        print(f"Yielding square of {n}")
        yield n*n
        print(f"I am back to generator")
        
    print("Done..finished everything")

In [None]:
nums_sqrs = squares(nums) # the execution has not started... it is i

In [None]:
next(nums_sqrs) #

Starting computing squares
Yielding square of 2


4

In [None]:
next(nums_sqrs) # execution again starts after yield statement! and cont

I am back to generator
Yielding square of 3


9

In [None]:
next(nums_sqrs)

I am back to generator
Yielding square of 4


16

In [None]:
next(nums_sqrs)

I am back to generator
Yielding square of 5


25

In [None]:
next(nums_sqrs) # when there is no yield statement remaining, i

I am back to generator
Done..finished everything


StopIteration: ignored

In [None]:
next(nums_sqrs)


StopIteration: ignored

In [None]:
def three_steps(x):
    print("now starting step 1")
    yield 1, x
    print("now starting step 2")
    yield 2, x
    print("now starting step 3")
    yield 3, x
    print("All steps done..must exit now!")

In [None]:
steps = three_steps("hello")

In [None]:
next(steps)

now starting step 1


(1, 'hello')

In [None]:
next(steps)

now starting step 2


(2, 'hello')

In [None]:
next(steps)

now starting step 3


(3, 'hello')

In [None]:
next(steps)

All steps done..must exit now!


StopIteration: ignored

In [None]:
def countdown(n):
    while n>0:
        yield n
        n -= 1

In [None]:
for i in countdown(5):
    print(i)

NameError: ignored

In [None]:

import gc

In [None]:
gc.get_stats()

NameError: ignored

In [None]:
%%file generator.py
import time
import sys

def squares(n):
    nums = list(range(n))
    return [i*i for i in nums]
    
    
def gsquares(n):
    for i in range(n):
        yield i*i
        
if __name__ == "__main__":
    if sys.argv[1] == "g":
        s = 0
        for i in gsquares(int(sys.argv[2])):
            s+=i
        time.sleep(20)
        print(s)
    else:
        sqrs = squares(int(sys.argv[1]))
        time.sleep(20)
        print(sum(sqrs))

Writing generator.py


In [None]:
!python generator.py 1000

332833500


### Building data piplines

In [None]:
import os

def find(root):
    for path, dirnames, filenames in os.walk(root):
        for f in filename:
            yield os.path.join(path, f)

In [None]:
def take(n, seq):
    return [next(seq) for i in range(n)]

In [None]:
def naturals():
    y = 1
    while 1:
        yield y
        y += 1 

In [None]:
x10 = (i*i for i in range(10))

In [None]:
for i in x10:
    print(i, end = " , ") 

0 , 1 , 4 , 9 , 16 , 25 , 36 , 49 , 64 , 81 , 

In [None]:
def squares(seq):
    return (i*i for i in seq)

In [None]:
nat = naturals()

In [None]:
nat

<generator object naturals at 0x7f38a4afbf50>

In [None]:
sqr_nat = squares(nat)

In [None]:
take(10, sqr_nat)

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [None]:
take(10, sqr_nat)

[121, 144, 169, 196, 225, 256, 289, 324, 361, 400]

In [None]:
import os

def find(root):
    for path, dirnames, filenames in os.walk(root):
        for f in filenames:
            yield  os.path.join(path, f)

def grep(pattern, seq):
    return (x for x in seq if pattern in x)
    

In [None]:
def count(seq):
    return sum((1 for i in seq))

In [None]:
import os

def find(root):
    for path, dirnames, filenames in os.walk(root):
        for f in filenames:
            yield  os.path.join(path, f)

def grep(pattern, seq):
    return (x for x in seq if pattern in x)
    
def readlines(filenames):
    for file in filenames:
        with open(file) as f:
            for line in f:
                yield line


In [None]:
files = find('.')
pyfiles = grep(".py", files)
take(1, pyfiles)
lines = readlines(pyfiles)
count(lines)

0

### Pratice

In [None]:
import os

def find(root):
    for path, dirnames, filenames in os.walk(root):
        for f in filenames:
            yield  os.path.join(path, f)

def take(n, seq):
    return [next(seq) for i in range(n)]

def grep(pattern, seq):
    return (x for x in seq if pattern in x)

In [None]:
files = find(".")
notebooks = grep(".ipynb", files)
take(3, notebooks)

StopIteration: ignored

In [None]:
def count(seq):
    return sum(1 for i in seq)

In [None]:
files = find(".")
notebooks = grep(".ipynb", files)
count(notebooks)

0

In [None]:
def readlines(files):
    for file in files:
        with open(file) as f:
            for line in f:
                yield line

In [None]:
files = find(".")
notebooks = grep(".ipynb", files)
count(notebooks)

0

In [None]:
files = find(".")
notebooks = grep(".ipynb", files)
lines = readlines(notebooks)
function_defs = grep("def", lines)
count(function_defs)

0

# Problems
- Write a function 'get_paragraphs' to split given textfile into paragraphs. Wherever there is empty line, thats where the paragraph ends. The function should take a sequence of lines, ie readlines function as an argument should return a sequence of paragraph
once the function is ready make use of it to find
- number of pararaphs in the book
- find biggest paragraph from the book 


In [None]:
!wget https://ia802902.us.archive.org/4/items/prideandprejudic01342gut/pandp12.txt

--2021-10-08 06:09:00--  https://ia802902.us.archive.org/4/items/prideandprejudic01342gut/pandp12.txt
Resolving ia802902.us.archive.org (ia802902.us.archive.org)... 207.241.233.52
Connecting to ia802902.us.archive.org (ia802902.us.archive.org)|207.241.233.52|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://archive.org/download/prideandprejudic01342gut/pandp12.txt [following]
--2021-10-08 06:09:00--  https://archive.org/download/prideandprejudic01342gut/pandp12.txt
Resolving archive.org (archive.org)... 207.241.224.2
Connecting to archive.org (archive.org)|207.241.224.2|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://ia902806.us.archive.org/23/items/prideandprejudic01342gut/pandp12.txt [following]
--2021-10-08 06:09:01--  https://ia902806.us.archive.org/23/items/prideandprejudic01342gut/pandp12.txt
Resolving ia902806.us.archive.org (ia902806.us.archive.org)... 207.241.232.106
Connecting to ia902806.

In [None]:
def line_seq(filename):
    with open(filename) as f:
        for line in f:
            yield line

def get_paragraphs(seq):
    paragraph = []
    for line in seq:
        if line.strip() == "":
            yield ''.join(paragraph)
            paragraph = []
        else:
            paragraph.append(line)
    if paragraph:
        yield "".join(paragraph)

In [None]:
paras = get_paragraphs(line_seq('pandp12.txt'))

In [None]:
count(paras)

2401