<a href="https://colab.research.google.com/github/AnupJoseph/adv-python/blob/master/Iterators.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import itertools as it
import operator
import random
import pandas as pd
import csv

from collections import namedtuple
from datetime import datetime

In [None]:
# The zip function takes any number of iterators as input arguements and returns an iterator over the tuplesof corresponding elements

In [None]:
tuple(zip([1,2,3],['a','d','f']))

((1, 'a'), (2, 'd'), (3, 'f'))

In [None]:
# An iterator means you can get its elements one at a time and this trick is used by zip to gather them together
# Under the hood, the zip() function works, in essence, by calling iter() on each of its arguments, 
# then advancing each iterator returned by iter() with next() and aggregating the results into tuples. 
# The iterator returned by zip() iterates over these tuples.

In [None]:
# The map() built-in function is another “iterator operator” that, 
# in its simplest form, applies a single-parameter function to each element of an iterable one element at a time:

In [None]:
list(map(len,['abc','dwqa','p','']))

[3, 4, 1, 0]

In [None]:
# Now of course iterators themselves are iterable so you can combine them together
# This type of combinations are called iterator algebra combinations and a simply suprising. In their sheer pwer and usage.
list(map(sum,zip([2,3,4],(1,2,5))))

[3, 5, 9]

In [None]:
# Given a list of values inputs and a positive integer n, write a function that splits inputs into groups of length n. 
# For simplicity, assume that the length of the input list is divisible by n. 
# For example, if inputs = [1, 2, 3, 4, 5, 6] and n = 2, your function should return [(1, 2), (3, 4), (5, 6)].

# A real naive approach
def naive_grouper(inputs, n):
    num_groups = len(inputs) // n
    return [tuple(inputs[i*n:(i+1)*n]) for i in range(num_groups)]
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
naive_grouper(nums, 2)

# If you use this approach for a list say of length 10 million.
# Your computer will die in a agonising crash by losing all available memory to just this process

[(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]

In [None]:
# Now we use a  cool iterator algebra technique

def iter_grouper(inputs,n):
  iters = [iter(inputs)]*n
  return zip(*iters)

nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
list(iter_grouper(nums,5))
# I'll break this function down. The expression iters = [iter(inputs)]*n gives n references to the inputs array.
# However these references are the exact same list i.e. n references are created to the same array
# Since there are n arrays the zip(*iters) function is actually doing zip(inputs, inputs, inputs.. inputs) these are all the same arrays.
# When the __next__ is called on each list they are being all called on the same damn list. 
# Thus making one element fter the other come out of the array in a pop fashion. This of course leads to n tupes

[(1, 2, 3, 4, 5), (6, 7, 8, 9, 10)]

In [None]:
# Now this implementation of the grouper cool as it is doesn't look at a very important caveat 
# the fact that if length is notperfectly divisible this gives the wrong answer
# So we use itertools to handle that caveat

def coolest_grouper(inputs,n,fillvalue=None):
  iters = [iter(inputs)]*n
  return it.zip_longest(*iters,fillvalue = fillvalue)

nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
print(list(coolest_grouper(nums,7)))

[(1, 2, 3, 4, 5, 6, 7), (8, 9, 10, None, None, None, None)]


In [None]:
# To enumerate a dynamic list
list(zip(it.count(),['a','b','c']))

[(0, 'a'), (1, 'b'), (2, 'c')]

In [None]:
def fib():
  a,b = 0,1
  while True:
    yield a
    a,b = b,a+b

In [None]:
# Generating a alternating tuple
alternating = it.cycle([1,-1])

In [None]:
# The goal of this section, though, is to produce a single function that can generate any first order recurrence relation
# just pass it P, Q, and an initial value.
# For this we need another tool
# The accumulate() function takes two arguments—an iterable inputs and a binary function func (that is, a function with exactly two inputs)
# —and returns an iterator over accumulated results of applying func to elements of inputs.

In [None]:
# To generate the sum of series for each term
adder = it.accumulate([1,2,3,4,5,6],operator.add)

list(adder)

[1, 3, 6, 10, 15, 21]

In [None]:
# To keep track of running minimum
minima = it.accumulate([9, 21, 17, 5, 11, 12, 2, 6],min)
list(minima)

[9, 9, 9, 5, 5, 5, 2, 2]

In [None]:
# The order of the arguments in the binary function passed to accumulate() is important. 
# The first argument is always the previously accumulated result and the second argument is always the next element of the input iterable.

In [None]:
# To model a recurrence relation
# S(i) = P(S(i-1)) + Q

def first_order(p,q,initial_val):
  return it.accumulate(it.repeat(initial_val),lambda s,_ : p*s + q)

In [None]:
evens = first_order(p=1, q=2, initial_val=0)
list(next(evens) for _ in range(5))

[0, 2, 4, 6, 8]

In [None]:
odds = first_order(p=1, q=2, initial_val=1)
list(next(odds) for _ in range(5))

[1, 3, 5, 7, 9]

In [None]:
count_by_threes = first_order(p=1, q=3, initial_val=0)
list(next(count_by_threes) for _ in range(5))

[0, 3, 6, 9, 12]

In [None]:
count_by_fours = first_order(p=1, q=4, initial_val=0)
list(next(count_by_fours) for _ in range(5))

[0, 4, 8, 12, 16]

In [None]:
all_ones = first_order(p=1, q=0, initial_val=1)
list(next(all_ones) for _ in range(5))

[1, 1, 1, 1, 1]

In [None]:
all_twos = first_order(p=1, q=0, initial_val=2)
list(next(all_twos) for _ in range(5))

[2, 2, 2, 2, 2]

In [None]:
alternating_ones = first_order(p=-1, q=0, initial_val=1)
list(next(alternating_ones) for _ in range(5))

[1, -1, 1, -1, 1]

In [None]:
# A second order recurrence relation is defined as follows s(n) = p * s(n-1) + q * s(n-2) + r.

def second_order(p, q, r,initial_values):
  intermediate = it.accumulate(
      it.repeat(initial_values),
      lambda s,_ : (s[1], p*s[1] + p*s[0] + r)
  )
  return map(lambda x:x[0], intermediate)

In [None]:
fibs = second_order(p=1, q=1, r=0, initial_values=(0, 1))
list(next(fibs) for _ in range(8))

[0, 1, 1, 2, 3, 5, 8, 13]

In [None]:
# Pell numbers
pell = second_order(p=2, q=1, r=0, initial_values=(0, 1))
list(next(pell) for _ in range(6))

[0, 1, 2, 6, 16, 44]

In [None]:
lucas = second_order(p=1, q=1, r=0, initial_values=(2, 1))
list(next(lucas) for _ in range(6))

[2, 1, 3, 4, 7, 11]

In [None]:
# Dealing a deck of cards

In [None]:
ranks = ['A', 'K', 'Q', 'J', '10', '9', '8', '7', '6', '5', '4', '3', '2']
suits = ['H', 'D', 'C', 'S']

In [None]:
cards = ((rank,suit) for rank in ranks for suit in suits)
# While this is obviously usable we could do it in a way is a lot more compactusing this

In [None]:
cards = it.product(ranks,suits)

In [None]:
# Now lets shuffle the deck because we gotta do that

def shuffler(cards):
  deck = list(cards)
  random.shuffle(deck)
  return iter(tuple(deck))

cards = shuffler(cards)
cards

<tuple_iterator at 0x7fbad91619b0>

In [None]:
# Ofcourse we need a cut function else how would the dealer cheat?

def cut(cards, n):
  if n<0:
    return ValueError("Do you have any idea of what you are doing?")

  deck = list(cards)
  return iter(deck[n:] + deck[:n])

cards = cut(cards, 26) 

In [None]:
# The tee() function can be used to create any number of independent iterators from a single iterable.
iter1,iter2 = it.tee([1,2,3,4,5],2)

In [None]:
list(iter1)

[1, 2, 3, 4, 5]

In [None]:
list(iter1)

[]

In [None]:
list(iter2)

[1, 2, 3, 4, 5]

In [3]:
# Core data analysis techniques
dataset = pd.read_csv('https://raw.githubusercontent.com/realpython/materials/master/itertools-in-python3/SP500.csv')
dataset.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1950-01-03,16.66,16.66,16.66,16.66,16.66,1260000
1,1950-01-04,16.85,16.85,16.85,16.85,16.85,1890000
2,1950-01-05,16.93,16.93,16.93,16.93,16.93,2550000
3,1950-01-06,16.98,16.98,16.98,16.98,16.98,2010000
4,1950-01-09,17.08,17.08,17.08,17.08,17.08,2520000


In [7]:
!wget https://raw.githubusercontent.com/realpython/materials/master/itertools-in-python3/SP500.csv

--2020-08-27 06:35:08--  https://raw.githubusercontent.com/realpython/materials/master/itertools-in-python3/SP500.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1289165 (1.2M) [text/plain]
Saving to: ‘SP500.csv’


2020-08-27 06:35:09 (7.97 MB/s) - ‘SP500.csv’ saved [1289165/1289165]



In [11]:
class Datapoint(namedtuple('Datapoint',['date', 'value'])):
  __slots__ = ()

  def __le__(self,other):
    return self.data <= other.data

  def __lt__(self,other):
    return self.data < other.data

  def __ge__(self,other):
    return self.data > other.data

In [12]:
def read_data(csv_file,_strptime = datetime.strptime):
  with open(csv_file) as infile:
    reader = csv.DictReader(infile)
    for row in reader:
      yield Datapoint(date = _strptime(row['Date'],'%Y-%m-%d').date(),value=float(row['Adj Close']))

price = tuple(read_data('SP500.csv'))