# Iterables Cheat Sheet

___

## 1. Using Iter()

- #### Definition

In [12]:
flash = ['jay garrick', 'barry allen', 'wally west', 'bart allen']

# Create an iterator for flash: superhero
superhero =  iter(flash)

# Print each item from the iterator
print(next(superhero))
print(next(superhero))
print(next(superhero))
print(next(superhero))

jay garrick
barry allen
wally west
bart allen


- #### * / Splat Operator
-  Doees this only work with strings? why does (* superhero) not print

In [19]:
word = 'data'
it = iter(word)
print(* it)

d a t a


___

## 2. Enumerate
- Recall that enumerate() returns an enumerate object that produces a sequence of tuples, and each of the tuples is an index-value pair.
- Use enumerate on a list

In [23]:
avengers = ["hawkeye", "iron man", "thor", "quicksilver"] 

In [25]:
e = enumerate(avengers)
print(list(e))

[(0, 'hawkeye'), (1, 'iron man'), (2, 'thor'), (3, 'quicksilver')]


In [26]:
e = enumerate(avengers, start = 10)
print(list(e))

[(10, 'hawkeye'), (11, 'iron man'), (12, 'thor'), (13, 'quicksilver')]


In [27]:
for index, value in enumerate(avengers, start = 1):
    print(index, value)

1 hawkeye
2 iron man
3 thor
4 quicksilver


___

## 3. Zip
- Turn iterables to tuples

In [31]:
avengers = ["hawkeye", "iron man", "thor", "quicksilver"] 
names = ['barton', 'start', 'odinson', 'maximoff']

z = zip(avengers, names)

display(list(z))

[('hawkeye', 'barton'),
 ('iron man', 'start'),
 ('thor', 'odinson'),
 ('quicksilver', 'maximoff')]

- #### * / Splat Operator

In [36]:
mutants = ['charles xavier', 'bobby drake', 'kurt wagner', 'max eisenhardt', 'kitty pryde']
powers =['telepathy', 'thermokinesis', 'teleportation', 'magnetokinesis', 'intangibility']

z1 = zip(mutants, powers)
display(*z1)

('charles xavier', 'telepathy')

('bobby drake', 'thermokinesis')

('kurt wagner', 'teleportation')

('max eisenhardt', 'magnetokinesis')

('kitty pryde', 'intangibility')

- using * will exhause the elements in your iterator, you will have to recreate the zip object you defined if you want to use it again

In [38]:
# cannot print z1 again unless it is recreated
print(list(z1))

[]


In [43]:
# redefine z1
z1 = zip(mutants, powers)
display(list(z1))

[('charles xavier', 'telepathy'),
 ('bobby drake', 'thermokinesis'),
 ('kurt wagner', 'teleportation'),
 ('max eisenhardt', 'magnetokinesis'),
 ('kitty pryde', 'intangibility')]

- #### Run two loops Simultaneously

In [46]:
for z1, z2 in zip(mutants, powers):
    print(z1, ':', z2)

charles xavier : telepathy
bobby drake : thermokinesis
kurt wagner : teleportation
max eisenhardt : magnetokinesis
kitty pryde : intangibility


___

## 4. Loading Large Files in Chunks
- Sum all the items in column 'lang' of tweets.csv
- Read and Sum the Data in chunks (assume it's too large)
- SEE ONENOTE

In [51]:
import pandas as pd

In [59]:
# Empty dictionary to hold the language count
counts = {}

for chunk in pd.read_csv('Datasets/tweets.csv', chunksize = 10):
    
    # for every entry in the column 'lang'
    for entry in chunk['lang']:
        if entry in counts:
            counts[entry] += 1
        else:
            counts[entry] = 1

In [54]:
print(counts)

{'en': 97, 'et': 1, 'und': 2}


- #### a more robust version

In [68]:
def count_entries(csv_file, chk_size, col_name):
    
    counts = {}
    
    for chunk in pd.read_csv(csv_file, chunksize = chk_size):
        
        for entry in chunk[col_name]:
            if entry in counts:
                counts[entry] += 1
            else:
                counts[entry] = 1
                
    return counts

In [69]:
col_entry_count = count_entries('Datasets/tweets.csv', 10, 'lang')

In [73]:
print(col_entry_count)

{'en': 97, 'et': 1, 'und': 2}
