In [1]:
import pandas as pd

# Iterators

Iterables: Object that has an associated iter() method and can return an iterator. 
* lists
* strings
* dictionaries
* file connections

Iterator: An object that keeps state and produces the next value when you call next().
An iter() has an object that has an associated next method that produces the consecutive values with next().
* iter()
* next()

## 1. Examples of 'known' iterators

In [None]:
# Iterating over a list
alist = ['name1','name2','name3']
for name in alist:
    print(name)

In [None]:
# Iterate over characters in a string
for letter in 'word':
    print(letter)

In [None]:
# Iterate over a number produced by a a range object
for i in range(4):
    print(i)

In [None]:
# Iterating over file connection
file = open('file'.txt)
it = iter(file)
print(next(it)) # --> first line

## 2. iter() and next() method

In [None]:
# Iterating over iterables
word = 'Name'
it = iter(word)
next(it) # --> N 
next(it) # --> a
next(it) # --> m
next(it) # --> e
# --> StopIterrationError

In [None]:
# Iterating over iterables
word = 'Name'
it = iter(word)
next(*it) # --> N a m e 

## 3. Iterators in action

### enumerate()
A pair of the element of the original iterable with their indext within the iterable.
Index starts at 0, but you can alter this with the argument start.

In [None]:
# Define an enumerate
alist = ['name1','name2','name3']
e = enumerate(alist)

# Make a tuple [(index, list element)]
e_list = list(e)

In [None]:
# Define an enumerate
alist = ['name1','name2','name3']
e = enumerate(alist)

# Display all elements, not by list, but by a for loop
for index, value in enumerate(alist, start = 1):
    print(index, value)

### zip()
Zipping lists together.

In [None]:
# Create zip object
alist = ['ele1','ele2','ele3']
names = ['name1','name2','name3']
z = zip(alist, names)

# Make a tuple [(index, list element)]
z_list = list(z)

# Display all elements, not by list, but by a for loop
for z1, z2 in zip(alist, names):
    print(z1, z2)

With a * all elements are displayed.

In [None]:
# Create zip object
alist = ['ele1','ele2','ele3']
names = ['name1','name2','name3']
z = zip(alist, names)
    
# Display all elements
print(*z)

In [None]:
# Create zip object
alist = ['ele1','ele2','ele3']
names = ['name1','name2','name3']
z = zip(alist, names)

# Display all elements
result1, result2 = zip(*z)
result1 == alist
result2 == names

### chunk
Using iterators to load large files into memory. This is usefull because you dont have to store data into memory but in chunks. It is like: 
1. Load the data in chunks
2. Perform the desired operation(s) on each chunk
3. Store the the results in a chunk
4. Dischard the cunk and then load the next chunk. 

In [None]:
# Iterating over data
result = []
for chunk in pd.read_csv('data.csv', chucksize = 1000): # Each chunk will be a dataframe
    # Compute the sum of the column of interest and append it to the list result
    result.append(sum(chunk['x']))
total = sum(result)

# or

result = []
for chunk in pd.read_csv('data.csv', chucksize = 1000): # Each chunk will be a dataframe
    # Compute the sum of the column of interest and append it to the list result
    total += sum(chunk['x'])
total = sum(result)