In [1]:
def square_numbers(nums):
    
    result = []
    
    for i in nums:
        result.append(i*i)
    return result 

In [2]:
my_nums = square_numbers([1,2,3,4,5])

In [3]:
print(my_nums)

[1, 4, 9, 16, 25]


So the above square function just returning a list how would we convert this to be a generator

In [4]:
def square_numbers(nums):
    
    for i in nums:
        yield(i*i)
    

That yield key word makes this as a generator

In [5]:
my_nums = square_numbers([1,2,3,4,5])

In [6]:
print(my_nums)

<generator object square_numbers at 0x000001C7E32D1D80>


We know that my_nums is a generator object

#### Generators dont hold entire results in memory it yields one result at a time 

We need to ask for the next result next()

In [7]:
print(next(my_nums))

1


In [8]:
print(next(my_nums))
print(next(my_nums))
print(next(my_nums))
print(next(my_nums))

4
9
16
25


In [9]:
print(next(my_nums))

StopIteration: 

Instead of getting these values one at a time we can use loop for getting

In [None]:
for num in my_nums:
    print(num)

We are not hetting StopIteration because for loop know when to loop over 

list comprehension`

In [None]:
my_nums = [x*x for x in [1,2,3,4,5]]

In [None]:
print(my_nums)

In [None]:
for num in my_nums:
    print(num)

#### Tuple comprehension is know as generator

In [None]:
my_nums = (x*x for x in [1,2,3,4,5])

In [None]:
print(my_nums)

In [None]:
for num in my_nums:
    print(num)

What if we wanted to actually print all the values from the generator 

We already know that generator not all hold values in memory to make like that we need convert into a list

In [None]:
my_nums = (x*x for x in [1,2,3,4,5])

In [None]:
print(my_nums)

In [None]:
list(my_nums)

So generator is better with performance because like i said its not holding all the values in the memory 

Whenever we cast generator object to a list and if that generator had a lot of values that it needed to convert to that list then you loose that performance(generator object is memory efficient)

In [10]:
pip install memory_profiler

Note: you may need to restart the kernel to use updated packages.


In [14]:
import memory_profiler as mem_profile
import random
import time

names = ['John', 'Corey', 'Adam', 'Steve', 'Rick', 'Thomas']
majors = ['Math', 'Engineering', 'CompSci', 'Arts', 'Business']

print('Memory (Before): {}Mb'.format(mem_profile.memory_usage()))

def people_list(num_people):
    result = []
    for i in range(num_people):
        person = {
                    'id': i,
                    'name': random.choice(names), 
                    'major': random.choice(majors)
                }
        result.append(person)
    return result

# def people_generator(num_people):
#     for i in range(num_people):
#         person = {
#                     'id': i,
#                     'name': random.choice(names),
#                     'major': random.choice(majors)
#                 }
#         yield person

t1 = time.time()
people = people_list(1000000)
t2 = time.time()

# t1 = time.clock()
# people = people_generator(1000000)
# t2 = time.clock()

print('Memory (After) : {}Mb'.format(mem_profile.memory_usage()))
print('Took {} Seconds'.format(t2-t1))

Memory (Before): [110.48046875]Mb
Memory (After) : [333.3203125]Mb
Took 1.9170572757720947 Seconds


In [16]:
import memory_profiler as mem_profile
import random
import time

names = ['John', 'Corey', 'Adam', 'Steve', 'Rick', 'Thomas']
majors = ['Math', 'Engineering', 'CompSci', 'Arts', 'Business']

print('Memory (Before): {}Mb'.format(mem_profile.memory_usage()))

# def people_list(num_people):
#     result = []
#     for i in range(num_people):
#         person = {
#                     'id': i,
#                     'name': random.choice(names), 
#                     'major': random.choice(majors)
#                 }
#         result.append(person)
#     return result

def people_generator(num_people):
    for i in range(num_people):
        person = {
                    'id': i,
                    'name': random.choice(names),
                    'major': random.choice(majors)
                }
        yield person

# t1 = time.time()
# people = people_list(1000000)
# t2 = time.time()

t1 = time.time()
people = people_generator(1000000)
t2 = time.time()

print('Memory (After) : {}Mb'.format(mem_profile.memory_usage()))
print('Took {} Seconds'.format(t2-t1))

Memory (Before): [333.5703125]Mb
Memory (After) : [116.89453125]Mb
Took 0.09900403022766113 Seconds


You can see that memory exactly same that before and after and thats because that generator object hasn't done anything yet its not holding anything(values exm 1million)

Now we can see that memory consumption once we casted our generator object to list

In [17]:
import memory_profiler as mem_profile
import random
import time

names = ['John', 'Corey', 'Adam', 'Steve', 'Rick', 'Thomas']
majors = ['Math', 'Engineering', 'CompSci', 'Arts', 'Business']

print('Memory (Before): {}Mb'.format(mem_profile.memory_usage()))

# def people_list(num_people):
#     result = []
#     for i in range(num_people):
#         person = {
#                     'id': i,
#                     'name': random.choice(names), 
#                     'major': random.choice(majors)
#                 }
#         result.append(person)
#     return result

def people_generator(num_people):
    for i in range(num_people):
        person = {
                    'id': i,
                    'name': random.choice(names),
                    'major': random.choice(majors)
                }
        yield person

# t1 = time.time()
# people = people_list(1000000)
# t2 = time.time()

t1 = time.time()
people = list(people_generator(1000000))
t2 = time.time()

print('Memory (After) : {}Mb'.format(mem_profile.memory_usage()))
print('Took {} Seconds'.format(t2-t1))

Memory (Before): [115.90625]Mb
Memory (After) : [333.42578125]Mb
Took 1.9570972919464111 Seconds
