# Iteration and the \__getitem__  method

In [1]:
import re
import reprlib

In [2]:
#regular expression to match any character (equivalent to the set [^a-zA-Z0-9_]) occuring at least once.
RE_WORD = re.compile('\w+')

For a class to be iterable, the only dunder method it needs is \__getitem__().

In [3]:
class collectionOfWords: 
    
    def __init__(self, text): 
        self.text = text
        self.words = RE_WORD.findall(text)
            
    """The getitem dunder method is why any python sequence is iterable. When trying to iterate over an object, the python 
    interpreter will call iter(object). The iter built in function checks whether the object implements __iter__.
    If it is not implemented, but __getitem__ is, python will create an iterator to atttempt to fetch all items in order.
    Every built in sequence in python implements __getitem__, hence they are all iterable. (They also all implement __iter__).
    """
    
    def __getitem__(self, index):
        return self.words[index]
    
    
    #Limits generated string to 30 characters by default. Useful for very large data structures.
    def __repr__(self):
        return 'collectionOfWords({})'.format(reprlib.repr(self.text))

In [4]:
words = collectionOfWords("This is a collection of words")

In [5]:
words

collectionOfWords('This is a co...tion of words')

In [6]:
words[1]

'is'

In [7]:
list(words)

['This', 'is', 'a', 'collection', 'of', 'words']

In [8]:
#will throw a TypeError as it has no __len__ method
len(words)

TypeError: object of type 'collectionOfWords' has no len()

In [9]:
class practiceSequence: 
    
    def __init__(self, sequence):
        self.sequence = sequence
        self.words = RE_WORD.findall(sequence)
        
    def __getitem__(self, index):
        return self.words[index]
    
    #allows calling len() on instances of the class
    def __len__(self):
        return len(self.words)

In [10]:
practice = practiceSequence("one two three")

In [11]:
practice.sequence

'one two three'

In [12]:
practice.words

['one', 'two', 'three']

In [13]:
len(practice)

3

In [14]:
class representation:
    
    def __init__(self, text):
        self.text = text
        
    def __repr__(self):
        return "{}".format(self.text)

In [15]:
representation("Hello world!")

Hello world!