In [1]:
import pandas as pd

# Introduction to Iterators

* Iterables is an object that has an associated iter() method.
* Iterator is an object that has an associated next() method. 
* create an iterator from an iterable using the iter() function

Summary
* An iterable is an object that can return an iterator 
* An iterator is an object that keeps state and produces the next value when you call next()

In [2]:
phrase = 'Howdy my name is Servin'
variable = iter(phrase)

next(variable)
next(variable)
next(variable)
print(* variable)

d y   m y   n a m e   i s   S e r v i n


## Iterating through Dictionaries

In [3]:
info = {
    'first_name':'Jose',
    'last_name':'Servin',
    'age':25,
    'Goal':'Become a Full Data Scientist/Analyst in 2022'
}

for key, value in info.items():
    print('The attribute is: ' + key)
    print('The value is: ' + str(value))

The attribute is: first_name
The value is: Jose
The attribute is: last_name
The value is: Servin
The attribute is: age
The value is: 25
The attribute is: Goal
The value is: Become a Full Data Scientist/Analyst in 2022


# Enumerate Function

* allows us to add a counter to any iterable 

In [4]:
names = ['Baker','Bella','Camila','Claudia','Mayra','Melissa']

for index, value in enumerate(names):
    print(index, value)

0 Baker
1 Bella
2 Camila
3 Claudia
4 Mayra
5 Melissa


In [5]:
names = ['Baker','Bella','Camila','Claudia','Mayra','Melissa']

for index, value in enumerate(names, start=1):
    print(index, value)

1 Baker
2 Bella
3 Camila
4 Claudia
5 Mayra
6 Melissa


## using enumerate

In [6]:
# Create a list of strings: mutants
mutants = ['charles xavier', 
            'bobby drake', 
            'kurt wagner', 
            'max eisenhardt', 
            'kitty pryde']

# Create a list of tuples: mutant_list
mutant_list = list(enumerate(mutants))

# Print the list of tuples
print(mutant_list)

# Unpack and print the tuple pairs
for index1, value1 in enumerate(mutants):
    print(index1, value1)

# Change the start index
for index1, value1 in enumerate(mutants, start=1):
    print(index1, value1)

[(0, 'charles xavier'), (1, 'bobby drake'), (2, 'kurt wagner'), (3, 'max eisenhardt'), (4, 'kitty pryde')]
0 charles xavier
1 bobby drake
2 kurt wagner
3 max eisenhardt
4 kitty pryde
1 charles xavier
2 bobby drake
3 kurt wagner
4 max eisenhardt
5 kitty pryde


# zip function

* allows us to stitch together any number of iterables 
* accepts an arbitrary number of iterables and returns an iterator of tuples

In [7]:
breeds = ['dog','dog','cat','dog']
names = ['Baker','Lilo','Bella','Camila']

for z1, z2 in zip(names, breeds):
    print(z1, z2)

Baker dog
Lilo dog
Bella cat
Camila dog


# Using iterators to load large files into memory

## load data in chunks using an iterator

In [10]:
# basic use-case for iterator 
MLY_CLDD_BASE45 = []
for chunk in pd.read_csv('/Users/joseservin/DataCamp/Courses/Intro_Matplotlib/seattle_weather.csv', chunksize=12):
    MLY_CLDD_BASE45.append(sum(chunk['MLY-CLDD-BASE45']))

print(MLY_CLDD_BASE45)

[2614, 2813, 3257, 3504, 3055, nan, 3301, 3395, nan, 2602, 3408, 1973, 3030, 3048, 2963, 3098, 3465]


In [13]:
# basic use-case for iterator 
total = 0
for chunk in pd.read_csv('/Users/joseservin/DataCamp/Courses/Intro_Matplotlib/seattle_weather.csv', chunksize=12):
    total += sum(chunk['MLY-CLDD-BASE45'])
print(total)

nan


# Processing large amounts of Twitter Data

## Using chunksize 

In [15]:
counts_dict = {}

for chunk in pd.read_csv('/Users/joseservin/DataCamp/Courses/Python_Toolbox/tweets.csv', chunksize=10):
    for entry in chunk['lang']:
        if entry in counts_dict.keys():
            counts_dict[entry] += 1 
        else:
            counts_dict[entry] = 1
print(counts_dict)


{'en': 97, 'et': 1, 'und': 2}


## Using a function and chunksize

In [18]:
def count_entries(csv_name, chunk_size, col_name):
    """returns a dictionary with count of entries per unique observation"""
    counts_dict = {}

    for temp_df in pd.read_csv(csv_name, chunksize=chunk_size):
        for entry in temp_df[col_name]:
            if entry in counts_dict.keys():
                counts_dict[entry] += 1
            else:
                counts_dict[entry] = 1
    return counts_dict
    

In [19]:
results = count_entries('/Users/joseservin/DataCamp/Courses/Python_Toolbox/tweets.csv', 10, 'lang')
print(results)

{'en': 97, 'et': 1, 'und': 2}


# List Comprehensions 

* Collapse for-loops for building lists into a single line
* components
    * iterable 
    * iterable variable
    * output expression
    

In [22]:
nums = [1,2,3,4,5,6,7]
new_nums = [i + 1 for i in nums]
print(new_nums)

[2, 3, 4, 5, 6, 7, 8]


## Matrix building

In [23]:
matrix = [[col for col in range(0,5)] for row in range(0,5)]

In [24]:
for row in matrix:
    print(row)

[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]


## Conditionals in Comprehensions 

In [25]:
[num ** 2 for num in range(0,11) if num % 2 == 0]

[0, 4, 16, 36, 64, 100]

## Conditionals on the output expression

In [26]:
[num ** 2 if num % 2 == 0 else 0 for num in range(10)]

[0, 0, 4, 0, 16, 0, 36, 0, 64, 0]

## Dictionary Comprehension

In [29]:
{num : -num for num in range(11)}

{0: 0, 1: -1, 2: -2, 3: -3, 4: -4, 5: -5, 6: -6, 7: -7, 8: -8, 9: -9, 10: -10}

# Introduction to generator expressions