# Iterators, List Compression and Generators

## Iterators using for loop and iter method

In [None]:
flash = ['jay garrick', 'barry allen', 'wally west', 'bart allen']

for person in flash:
    print(person)

print('----------------------- LOOP END ------------------------')

# Create an iterator for flash
superhero = iter(flash)

# Print each item from the iterator
print(next(superhero))
print(next(superhero))
print(next(superhero))
print(next(superhero))
# print(next(superhero)) # calling it again will throw error, because all elments have been iterated


In [None]:
flash = ['jay', 'barry', 'wally', 'bart']
# print(*flash) # print all elements at once
superhero = iter(flash)
print(*superhero) # print all elements at once

### Iterator using range

In [None]:
# Create an iterator for range(3)
small_value = iter(range(3))

print(next(small_value))
print(next(small_value))
print(next(small_value))
# print(next(small_value)) # calling it again throw error, because

# for num in range(3):
#     print(num)

In [None]:
# Iterators are helpful when data is too large to load at once in memory. It will load data in memory; chunk by chunk; by using "next" function

small_value = iter(range(10**100))
print(next(small_value))
print(next(small_value))

## Enumerates

In [None]:
mutants = ['charles xavier', 'bobby drake', 'kurt wagner', 'max eisenhardt', 'kitty pryde']

# Create a list of tuples
mutant_list = list(enumerate(mutants))

print(mutant_list) # Print the list of tuples

# Unpack and print the tuple pairs
for index1, value1 in enumerate(mutants):
    print(index1, value1)

print('---------------------- END ------------------------')

# Change the start index
for index2, value2 in enumerate(mutants, start=10):
    print(index2, value2)


## Zip and Unzip

In [None]:
mutants = ['charles_xavier', 'bobby_drake', 'kurt_wagner', 'max_eisenhardt', 'kitty_pryde']
aliases = ['prof_x', 'iceman', 'nightcrawler', 'magneto', 'shadowcat']

# Create a list of tuples
mutant_data = list(zip(mutants,aliases))
mutant_data_dictionary = dict(zip(mutants,aliases))

print("mutant_data_dictionary: ", mutant_data_dictionary)
print("mutant_data: ", mutant_data) # Print the list of tuples
print('-----------------------------------------')

mutant_zip = zip(mutants,aliases) # Create a zip object

print(mutant_zip) # Print the zip object
print('-----------------------------------------')

# Unpack the zip object and print the tuple values
for value1, value2 in mutant_zip:
    print(value1, value2)

###########################################################################################################
print("###################################################################################################")
z1 = zip(mutants, aliases)

print(*z1) # Print the tuples in z1 by unpacking with *

z1 = zip(mutants, aliases) # Re-create a zip object from mutants, because previous z1 is empty now

result1, result2 = zip(*z1) # 'Unzip' the tuples

# Check if unpacked tuples are equivalent to original tuples
print(result1 == mutants)
print(result2 == aliases)

print(type(result1), type(mutants))
print(list(result1))
print(list(result1) == mutants)


## Fetching data chunk by chunk and process

`This way, all data will not be loaded on memory at once. By rather, it will be leaded chunk by chunk`

In [None]:
import pandas as pd
add = 0

# Iterate over the file chunk by chunk - fetch dataframe of 3 records at a time
for chunk in pd.read_csv('../00_datasets/medals.csv', chunksize=3, index_col=0):
    print(chunk, end='\n----------------------------------------\n')
    gold_series_with_int_type = chunk['Gold']
    add += gold_series_with_int_type.sum()

print(add)


In [None]:
import pandas as pd
add = 0

# Get dataframe chunk by chunk - fetch dataframe of 3 records at a time
df_reader = pd.read_csv('../00_datasets/medals.csv', chunksize=3, index_col=0) # returns a generator
print(next(df_reader), end='\n----------------------------------------\n')
print(next(df_reader), end='\n----------------------------------------\n')


## List Comprehensions

In [None]:
nums = [1,2,3,4]
squared_nums = [num ** 2 for num in nums]
print("squared_nums: ", squared_nums)

## Another way to above
# squares = [i ** 2 for i in range(1, 5)]
# print(squares)

## Another way to above
# squared_nums_using_map = map(lambda num: num ** 2 , nums)
# print(list(squared_nums_using_map))

################################### Nested list ###########################################
# # Simple Way
# arr = []
# for i in range(0, 3):
#     for j in range(0, 2):
#         arr.append((i, j))
# print(arr)

## Using List Comprehension
arr_new = [ (i, j) for i in range(0, 3) for j in range(0, 2)]
print("arr_new: ", arr_new)

arr_new_advance = [ [(i, j) for i in range(0, 3)] for j in range(0, 2)]
print("arr_new_advance: ", arr_new_advance)

#################################### List Comprehension with conditions ###############################
rand_nums = [0,5,3,7,6,2,10,21] 
squared_evenNums = [(num, num ** 2) for num in rand_nums if num % 2 == 0]
print("squared_evenNums: ", squared_evenNums)

evens_with_null_odds = [num if num % 2 == 0 else 0 for num in rand_nums]
print("evens_with_null_odds: ", evens_with_null_odds)

#################################### Create dictionary using List Comprehension ###############################
rand_nums1 = [0,5,3,7,6,2,10,21] 
squared_evenNums_dict = { num: num ** 2 for num in rand_nums1}
print("squared_evenNums_dict: ", squared_evenNums_dict)


## Generator Expressions

In [None]:
rand_nums = [0,5,3,7,6,2,10,21,22,26,82,31] 

# Generator expression - Its syntax is similar to list comprehension
even_nums = (num for num in rand_nums if num % 2 == 0)
print(even_nums)
print(next(even_nums))
print(next(even_nums))
print(next(even_nums))
print('----------- ITERATOR END ---------------')
## print rest of the values using for loop
for value in even_nums:
    print(value)


## Generator Functions

`It will not load all data at once into memory. Instead, it will load data step by step`

In [None]:
rand_nums = [0,5,3,7,6,2,10,21,22,26,82,31] 

# Define generator function get_lengths
def get_evens(input_list):
    """Generator function that yields the
    even numbers in input_list."""

    # Yield the even numbers only
    for person in input_list:
        if(person % 2 == 0):
            yield person

# Print the values generated by get_lengths()
for value in get_evens(rand_nums):
    print(value)

## Real World Example for Zip and List Compression

`Convert arrays to Data Frame`

In [None]:
import pandas as pd

def lists2dict(feature_names, row_list):
    zipped_values = zip(feature_names, row_list)
    dict_values = dict(zipped_values)
    return dict_values

feature_names = ['name', 'age', 'fare']
row_lists = [['John', '29', '550'], ['Doe', '35', '435'], ['Cena', '40', '450']]

# Turn list of lists into list of dicts
list_of_dicts = [lists2dict(feature_names, sublist) for sublist in row_lists]

df = pd.DataFrame(list_of_dicts) # Turn list of dicts into a DataFrame

print(df)


## Read large files, step by step

In [None]:
# This function is just for educational purpose. Otherwise, file alias is already a pre-defined generator
def read_large_file(file_object):
    """A generator function to read a large file lazily."""

    # Loop indefinitely until the end of the file
    while True:
        data = file_object.readline() # Read a line from the file
        # Break if this is the end of the file
        if not data:
            break
        
        yield data # Yield the line of data
        
# Open a connection to the file
with open('../00_datasets/medals.csv') as file:

    # Create a generator object for the file: gen_file
    gen_file = read_large_file(file)

    # Print the first three lines of the file
    print(next(gen_file))
    print(next(gen_file))
    print(next(gen_file))
    
    print('------------- GENERATOR LOOOPING START HERE -----------------')
    # Iterate rest of lines from generator
    for line in gen_file:
        print(line)

print('##########################################################################################')
################################# Simple way of above function ########################################
# file1 is pre-defined generator
with open('../00_datasets/medals.csv') as file1:
    # Read file line by line
    print(file1.readline())
    print(file1.readline())
    print(file1.readline())
    print(file1.readline())
    
    print('------------- GENERATOR LOOOPING START HERE -----------------')
    # Iterate rest of lines from generator
    for line in file1:
        print(line)


### Adding a column into only 1s chunk of dataframe

In [None]:
df_reader = pd.read_csv('../00_datasets/titanic.csv', chunksize=100) # Initialize reader object

df_reader_val = next(df_reader) # Get the first DataFrame chunk
print(df_reader_val.head())

print('--------------------------------------------------------------------')

df_male = df_reader_val[df_reader_val['sex'] == 'male'].copy() # filter by gender

age_fare = zip(df_male['age'], df_male['fare']) # Zip DataFrame columns of interest
age_fare_list = list(age_fare) # Turn zip object into list
print("age_fare_list: ", age_fare_list)
print('--------------------------------------------------------------------')

# Use list comprehension to create new DataFrame column
df_male['int_fare'] = [int(fare) for age, fare in age_fare_list]
# df_male['int_fare'] = df_male['fare'].astype(int)
print(df_male[['survived','sex','age', 'fare', 'int_fare']])

### Adding a column into dataframe, chunk by chunk

In [None]:
df_reader = pd.read_csv('../00_datasets/titanic.csv', chunksize=100) # Initialize reader object

titanic_male = pd.DataFrame() # Initialize empty DataFrame

# loop through each chunk, one-by-one
for df_reader_val in df_reader:
    df_male = df_reader_val[df_reader_val['sex'] == 'male'].copy() # filter by gender

    age_fare = zip(df_male['age'], df_male['fare']) # Zip DataFrame columns of interest
    age_fare_list = list(age_fare) # Turn zip object into list
    # print("age_fare_list: ", age_fare_list)
    # print('--------------------------------------------------------------------')

    # Use list comprehension to create new DataFrame column
    df_male['int_fare'] = [int(fare) for age, fare in age_fare_list]
    # print(df_male[['survived','sex','age', 'fare', 'int_fare']])

    titanic_male = pd.concat([titanic_male, df_male]) # Concatenate DataFrame chunk to the end of data

print(titanic_male.head())

      age     sex
0    22.0    male
1    38.0  female
2    26.0  female
3    35.0  female
4    35.0    male
..    ...     ...
886  27.0    male
887  19.0  female
888   NaN  female
889  26.0    male
890  32.0    male

[891 rows x 2 columns]
