# 1.1 iterators
## 通过iter() 将iterables转化为iterator, 用 next() 产生iterator的value 
iterables 包括 lists, strings, dictionaries, file connections

In [2]:
# Create a list of strings: flash
flash = ['jay garrick', 'barry allen', 'wally west', 'bart allen']

# Print each list item in flash using a for loop
for f in flash:
    print(f)


# Create an iterator for flash: superhero
superhero = iter(flash)

# Print each item from the iterator
print(next(superhero))
print(next(superhero))
print(next(superhero))
print(next(superhero))

SyntaxError: unexpected character after line continuation character (<ipython-input-2-e830bce6260b>, line 13)

### 用*全部迭代

In [3]:
word = 'Data'
it = iter(word)
print(*it)


D a t a


# 1.2 playing with iterators  

In [27]:
 avengers = ['hawkeye', 'iron man', 'thor', 'quicksilver']
names = ['barton', 'stark', 'odinson', 'maximoff']
z = zip(avengers, names)
print(z)

b = [i+ j for i , j in  z]
print(b)

<zip object at 0x106cd97c8>
['hawkeyebarton', 'iron manstark', 'thorodinson', 'quicksilvermaximoff']


In [5]:
 avengers = ['hawkeye', 'iron man', 'thor', 'quicksilver']
names = ['barton', 'stark', 'odinson', 'maximoff']
z = zip(avengers, names)
print(*z)

('hawkeye', 'barton') ('iron man', 'stark') ('thor', 'odinson') ('quicksilver', 'maximoff')


#### enumerate(list, start) 三种显示index迭代方法

In [None]:
# Create a list of strings: mutants
mutants = ['charles xavier', 
            'bobby drake', 
            'kurt wagner', 
            'max eisenhardt', 
            'kitty pryde']

# Create a list of tuples: mutant_list
mutant_list = list(enumerate(mutants))

# Print the list of tuples
print(mutant_list)

# Unpack and print the tuple pairs
for index1, value1 in enumerate(mutants):
    print(index1, value1)

# Change the start index,改变起始index位置
for index2, value2 in enumerate(mutants, start = 1):
    print(index2, value2)


In [15]:
 avengers = ('hawkeye', 'iron man', 'thor', 'quicksilver')
names = ('barton', 'stark', 'odinson', 'maximoff')
# Create a zip object from mutants and powers: z1
z1 = zip(avengers, names)

# Print the tuples in z1 by unpacking with *
print(*z1)

# Re-create a zip object from mutants and powers: z1
z1 = zip(avengers, names)

# 'Unzip' the tuples in z1 by unpacking with * and zip(): result1, result2
# result1, result2 = *z1 迭代打印可以，unpack是不行的
result1, result2 = zip(*z1)

# Check if unpacked tuples are equivalent to original tuples
print(result1 == avengers)
print(result2 == names)


SyntaxError: can't use starred expression here (<ipython-input-15-5634dac29f71>, line 16)

# 1.3 使用iterators to load large files into memory
## 当太大数据以致不能导入到内存中，可以使用chunks to load data!

In [None]:
import pandas as pd
result = []
for chunk in pd.read_csv('data.csv', chunksize=1000):
    result.append(sum(chunk['x']))
total = sum(result)
print(total)

In [None]:
import pandas as pd
total = 0
for chunk in pd.read_csv('data.csv', chunksize=1000):
    total += sum(chunk['x'])
print(total)

In [None]:
# Initialize an empty dictionary: counts_dict
counts_dict = dict()

# Iterate over the file chunk by chunk
for chunk in pd.read_csv('tweets.csv', chunksize = 10):

    # Iterate over the column in DataFrame
    for entry in chunk['lang']:
        if entry in counts_dict.keys():
            counts_dict[entry] += 1
        else:
            counts_dict[entry] = 1

# Print the populated dictionary
print(counts_dict)

#### 定义function来计算大文件的某一列的统计数量

In [None]:
# Define count_entries()
def count_entries(csv_file , c_size , colname):
    """Return a dictionary with counts of
    occurrences as value for each key."""

    

    # Initialize an empty dictionary: counts_dict
    counts_dict = {}

    # Iterate over the file chunk by chunk
    for chunk in pd.read_csv(csv_file ,chunksize = c_size):

        # Iterate over the column in DataFrame
        for entry in chunk[colname]:
            if entry in counts_dict.keys():
                counts_dict[entry] += 1
            else:
                counts_dict[entry] = 1

    # Return counts_dict
    return counts_dict

# Call count_entries(): result_counts
result_counts = count_entries('tweets.csv', 10, 'lang')

# Print result_counts
print(result_counts)


# 2.1 极其方便的list comprehension, 将for loops 变成单行
#### List comprehension 组成是1.iterable; 2. iterator variable(such as for i in range(10)中的 i )；3. output expression.
#### string, list, range()像这些都iterable的都能list comprehension, 像 int 不可iterable 的就不能list comprehension

In [None]:
nums = [12, 8, 21, 3, 16]
new_nums = [num + 1 for num in nums]
print(new_nums)

# Create list comprehension: squares
squares = [i **2 for i in range(10)]


In [18]:
pairs_2 = [(num1, num2) for num1 in range(0, 2) for num2 in range(6, 8)]
print(pairs_2)

#不用list comprehension 时，得这样
pairs_1 = []
for num1 in range(0, 2):
    for num2 in range(6, 8):
        pairs_1.append((num1, num2))
print(pairs_1)

[(0, 6), (0, 7), (1, 6), (1, 7)]
[(0, 6), (0, 7), (1, 6), (1, 7)]


### 用list comprehension 创建矩阵matrix

In [20]:
# Create a 5 x 5 matrix using a list of lists: matrix
matrix = [[col for col in range(5)] for row in range(5)]

print(matrix)

# Print the matrix
for row in matrix:
    print(row)


[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]


## 条件list comprehension 
### [ *output expression* for *iterator variable* in *iterable* if *predicate expression* ]

In [None]:
# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create list comprehension: new_fellowship
new_fellowship = [member for member in fellowship if len(member)>6]

# Print the new list
print(new_fellowship)


#### if else 表达略微不一样，if else放在for loop之前

In [29]:
# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create list comprehension: new_fellowship
new_fellowship = [ member if len(member) >= 7 else  '' for member in fellowship]

# Print the new list
print(new_fellowship)

['', 'samwise', '', 'aragorn', 'legolas', 'boromir', '']


## dict comprehension 
#### The main difference between a list comprehension and a dict comprehension is the use of curly braces {} instead of []. Additionally, members of the dictionary are created using a colon :, as in <key> : <value>.

In [None]:
# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create dict comprehension: new_fellowship
new_fellowship = { member : len(member) for member in fellowship }

# Print the new dictionary
print(new_fellowship)


# 2.2 Generator 
### list comprehension 在数运算大的时候就会卡死，如[ i for i in range(10 **10000)], generator 不会

#### List of strings
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

#### List comprehension
fellow1 = [member for member in fellowship if len(member) >= 7]

#### Generator expression
fellow2 = (member for member in fellowship if len(member) >= 7)

In [22]:
# Create a list of strings: lannister
lannister = ['cersei', 'jaime', 'tywin', 'tyrion', 'joffrey']

# Create a generator object: lengths
lengths = (len(person)for person in lannister )

# Iterate over and print the values in lengths
for value in lengths:
    print(value)


6
5
5
6
7


## 2.2.2 generator function

Generator functions are functions that, like generator expressions, yield a series of values, instead of returning a single value. A generator function is defined as you do a regular function, but whenever it generates a value, it uses the keyword yield instead of return.

Generators allow users to lazily evaluate data. This concept of *lazy evaluation* is useful when you have to deal with very large datasets because it lets you generate values in an efficient manner by yielding only chunks of data at a time instead of the whole thing at once.

In [21]:
# Create a list of strings
lannister = ['cersei', 'jaime', 'tywin', 'tyrion', 'joffrey']

# Define generator function get_lengths
def get_lengths(input_list):
    """Generator function that yields the
    length of the strings in input_list."""

    # Yield the length of a string
    for person in input_list:
        yield len(person)

# Print the values generated by get_lengths()
for value in get_lengths(lannister):
    print(value)

6
5
5
6
7


## with open('XX.csv') as file_name :
代码更佳简洁，并且不必每次调用f.close()方法，会自动帮我们close。总之，使用Python内置open函数打开文件，提倡with open() as .
而 使用Python内置的open()函数， 每次都要调用close()方法关闭文件。文件使用完毕后必须关闭，因为文件对象会占用操作系统的资源，并且操作系统同一时间能打开的文件数量也是有限的. 详细说明参考：https://blog.csdn.net/xrinosvip/article/details/82019844

## case study 世界银行组织 国家城市人口 可视化
#### 说明， 'ind_pop_data.csv' 是一个非常大的文件，本地没下载。大文件需要用到generator， 用chunksize分批读取。以下代码基本是实战代码，结合了调用函数，generator 读取，list comprehension， iterator.

In [None]:
# Define plot_pop()
def plot_pop(filename, country_code):

    # Initialize reader object: urb_pop_reader
    urb_pop_reader = pd.read_csv(filename, chunksize=1000)

    # Initialize empty DataFrame: data
    data = pd.DataFrame()
    
    # Iterate over each DataFrame chunk
    for df_urb_pop in urb_pop_reader:
        # Check out specific country: df_pop_ceb
        df_pop_ceb = df_urb_pop[df_urb_pop['CountryCode'] == country_code]

        # Zip DataFrame columns of interest: pops
        pops = zip(df_pop_ceb['Total Population'],
                    df_pop_ceb['Urban population (% of total)'])

        # Turn zip object into list: pops_list
        pops_list = list(pops)

        # Use list comprehension to create new DataFrame column 'Total Urban Population'
        df_pop_ceb['Total Urban Population'] = [int(tup[0] * tup[1] * 0.01) for tup in pops_list]
    
        # Append DataFrame chunk to data: data
        data = data.append(df_pop_ceb)

    # Plot urban population data
    data.plot(kind='scatter', x='Year', y='Total Urban Population')
    plt.show()

# Set the filename: fn
fn = 'ind_pop_data.csv'

# Call plot_pop for country code 'CEB'
plot_pop('ind_pop_data.csv', 'CEB')

# Call plot_pop for country code 'ARB'
plot_pop('ind_pop_data.csv', 'ARB')