In [None]:
### Delayed Evaluations, Partial Functions, Weak References/Proxies, Collections

In [1]:
from functools import partial

def add(x, y):
    """ Adds two numbers """
    return x + y


In [2]:
add(7, 8)

15

In [2]:
# Partial allows one to 'freeze' one or more of the arguments of
# a function taking multiple arguments.
add5 = partial(add, 5)
print add5

<functools.partial object at 0x7f9154c5b838>


In [3]:
print add5(10)
print add5(17)

15
22


In [4]:
### Freezing more than one argument
import random

### Calling random.randrange over a given range and using it with itertools
from itertools import imap, repeat, islice

irandom = imap(random.randrange, repeat(1), repeat(100))


In [5]:
for item in islice(irandom, 10):
    print item

57
95
1
59
93
70
5
16
12
41


In [11]:
### How about using a partial
prandom = partial(random.randrange, 1)
print prandom

print prandom(100)

<functools.partial object at 0x7f9154406050>
11


In [15]:
irandom = imap(prandom, repeat(100))

for i in islice(irandom, 10):
    print i

27
81
99
27
97
44
89
5
53
72


In [30]:
### Another way 
prandom = partial(random.randrange, 1, 100)
print prandom()


61


In [38]:

# define a generator
def myrandom():
    while True:
        yield prandom()
        
for i in islice(myrandom(), 10):
    print i

6
39
2
96
1
97
41
96
66
13


### Delayed Evaluations

1. When a function evaluation is delayed till the moment its data is needed.
2. Also called lazy evaluation.
3. Partials are very good for this purpose.

In [21]:
import string

letters = list(string.ascii_lowercase*100)
# Jumble it up
random.shuffle(letters)

def make_random_file(filename):
    """ Create a file with some random text inside it """
    
    text = ''.join(random.sample(letters, 100))
    open(filename, 'w').write(text + '\n')


In [50]:
import string

letters = list(string.ascii_lowercase*100)
# Jumble it up
random.shuffle(letters)

def make_random_file(filename):
    """ Create a file with some random text inside it """
    
    lines = []
    for i in range(50):
        size = random.randrange(50, 100)
        text = ''.join(random.sample(letters, size))
        lines.append(text + '\n')
    open(filename, 'w').writelines(lines)

In [51]:
make_random_file('anand.txt')

In [22]:
for i in range(1, 100):
    fname = 'test/text' + str(i)
    make_random_file(fname)

In [42]:
import hashlib

def signature(filename):
    return hashlib.sha1(open(filename).read()).hexdigest()

In [24]:
print signature('test/text1')

79ebfb325afd1c44064ef70b0aeb7b70713f9029


### Imagine you need to create signatures for a million files
### You want to delay this till the last second - i.e delayed evaluation.

In [40]:
### Using generators
import os

def signature_generator(folder):
    for filename in os.listdir(folder):
        fpath = os.path.join(folder, filename)
        if os.path.isfile(fpath):
            # print fpath
            yield signature(fpath)
            

In [43]:
gen = signature_generator('test')
print gen

# Get signatures of files in range of 30..40
# help(islice)
def compute(start_idx, stop_idx):
    return list(islice(gen, start_idx, stop_idx, 1))
    
print compute(30, 40)


<generator object signature_generator at 0x7f915b66ae60>
['ce77baa0654f81524ef4d2bb585095e866e14ef4', 'a7dc4ded0a3b30f264910bbead67b90207b55c43', '90a5b2a6dd133bd5b1233d0132c668db1b9b72bd', 'e7b31b8bc5cd23f61746a811bad0e9379441fc15', '820f2b6bc65e7202c7adf6e491bbb35a63676d00', '1fee8032d6955ee2b3293f64677c27d43fe09f2c', 'c91869a5f5f0b8297ea3075115e7b8e977ed4958', 'f0c337888983ef33376ce02443f4950d5eba2a75', '13692db0672088da69b2eaaba5934674d226f8d9', 'c79daa8b62ab1efec634fc77762c202f452ba8de']


In [48]:
### Using partials

def signature_generator_p(folder):
    
    items = []
    for filename in os.listdir(folder):
        fpath = os.path.join(folder, filename)
        if os.path.isfile(fpath):
            pf = partial(signature, fpath)
            items.append(pf)
            # 
            # yield pf
            
    return items

In [45]:
partials = signature_generator_p('test')
print len(partials)

102


In [47]:
prandom.args

(1, 100)

In [46]:
### Generating data from partials
# Generate signatures for files in range 10..20
print map(lambda x: x(), partials[10:20])

# Want to use itertools ?
print list(imap(lambda x: x(), partials[10:20]))
# Same answer ?
# itertools allows finer control

# Only calculate signatures for those files whose first character is 'a'
from itertools import ifilter

print list(imap(lambda x: x(),
     ifilter(lambda x: open(x.args[0]).read()[0] == 'a', partials)))

['e91ceff438e2a46a06426e4afc02c66570c1a08d', '5788d195da3513017fd213d1d6e1d27af04bc2eb', 'a680778af61607751599a505e532eaa014f2c43d', '2528b9ab9db3e7e5fb8ef1a89b2b6e7b420d9f60', 'c0b882cbbca7463a50135fa6aafecd9858d07574', '2a939ce87559d3184b012a0cd4a476f61d56eb4c', 'f86a9f854bfad8e63178f032b7a57515e7cc1964', 'dd0ec6268384db9bef44d556bda92c35db4d5d99', '135c2b17b24e50bf5db6379ca8d2a85669e916b3', '978730f8e3678199c0a1b8d6d790e31e666079f0']
['e91ceff438e2a46a06426e4afc02c66570c1a08d', '5788d195da3513017fd213d1d6e1d27af04bc2eb', 'a680778af61607751599a505e532eaa014f2c43d', '2528b9ab9db3e7e5fb8ef1a89b2b6e7b420d9f60', 'c0b882cbbca7463a50135fa6aafecd9858d07574', '2a939ce87559d3184b012a0cd4a476f61d56eb4c', 'f86a9f854bfad8e63178f032b7a57515e7cc1964', 'dd0ec6268384db9bef44d556bda92c35db4d5d99', '135c2b17b24e50bf5db6379ca8d2a85669e916b3', '978730f8e3678199c0a1b8d6d790e31e666079f0']
['541dfc1fafeebc50b15ff211510f6e1f916bdb74', '11397943959b8e82fa9926043f739a1dc0cf6e06']


### Weak References

In [80]:
### To avoid strong coupling between objects
import weakref

class C(object):
    def __init__(self):
        self.items = []
    def add(self, item):
        self.items.append(item)
        
class D(object): pass

In [81]:
c=C()
d=D()

In [82]:
import gc
# print dir(gc)
print 'First=>',len(gc.get_referrers(d))

# Add a reference
c.d = d

print 'Second=>',len(gc.get_referrers(d))

First=> 1
Second=> 2


In [92]:
# Create a list of C objects
clist = map(lambda x: C(), range(100))
# print len(clist)

for obj in clist:
    obj.d = d

In [85]:
print len(gc.get_referrers(d))

102


In [86]:
# Decreasing ref counts
del c

In [87]:
print len(gc.get_referrers(d))

101


In [88]:
del clist

In [90]:
print len(gc.get_referrers(d))

2


In [90]:
print len(gc.get_referrers(d))
# List addition increases references
clist[0].add(d)
print len(gc.get_referrers(d))

101
102


In [91]:
## Weak references to the rescue!
d2 = D()
print len(gc.get_referrers(d2))

1


In [94]:
# Add a weak reference this time
for obj in clist:
    obj.d2 = weakref.ref(d2)

# Still at 1 only!
print 'References after=>',len(gc.get_referrers(d2))

References after=> 1


In [95]:
# using weak references
dref = weakref.ref(d)
print dref

<weakref at 0x7fe24f4e69f0; to 'D' at 0x7fe26e148b10>


In [96]:
# Dereference to object
dobj = dref()
print dobj
print dobj == d

<__main__.D object at 0x7fe26e148b10>
True


In [97]:
### Proxies
cobj = clist[0]
print cobj
cproxy = weakref.proxy(cobj)
# A proxy behaves in all ways similar to the object it proxies to
print cproxy

<__main__.C object at 0x7fe24f525710>
<__main__.C object at 0x7fe24f525710>


In [98]:
cproxy.add('test')
print cproxy.items
print cobj.items

['test']
['test']


In [99]:
print len(gc.get_referrers(cobj))
print len(gc.get_referrers(cproxy))

2
1


In [100]:
# Increase reference to cobj
clist[1].coj = cobj
print len(gc.get_referrers(cobj))

3


In [101]:
# References to proxy remains the same!
print len(gc.get_referrers(cproxy))

1


In [102]:
# Add reference to proxy
clist[2].cproxy = cproxy
# Now it increases
print len(gc.get_referrers(cproxy))

2


In [103]:
C.__dict__

dict_proxy({'__dict__': <attribute '__dict__' of 'C' objects>,
            '__doc__': None,
            '__init__': <function __main__.__init__>,
            '__module__': '__main__',
            '__weakref__': <attribute '__weakref__' of 'C' objects>,
            'add': <function __main__.add>})

## Collections Module

In [102]:
import collections

dir(collections)

['Callable',
 'Container',
 'Counter',
 'Hashable',
 'ItemsView',
 'Iterable',
 'Iterator',
 'KeysView',
 'Mapping',
 'MappingView',
 'MutableMapping',
 'MutableSequence',
 'MutableSet',
 'OrderedDict',
 'Sequence',
 'Set',
 'Sized',
 'ValuesView',
 '__all__',
 '__builtins__',
 '__doc__',
 '__file__',
 '__name__',
 '__package__',
 '_abcoll',
 '_chain',
 '_class_template',
 '_eq',
 '_field_template',
 '_get_ident',
 '_heapq',
 '_imap',
 '_iskeyword',
 '_itemgetter',
 '_repeat',
 '_repr_template',
 '_starmap',
 '_sys',
 'defaultdict',
 'deque',
 'namedtuple']

### defaultdict

In [2]:
type('x')

str

In [5]:
l = ['a', 'b', 'a', 'b', 'c', 'd', 'c', 'e']
d = {}

for i in l:
    if i not in d:
        d[i] = 0
    d[i] += 1
    
print d


from collections import defaultdict

dd = defaultdict(int)

for i in l:
    dd[i] += 1

dd['x'] = 'y'
print dd

{'a': 2, 'c': 2, 'b': 2, 'e': 1, 'd': 1}
defaultdict(<type 'int'>, {'a': 2, 'c': 2, 'b': 2, 'e': 1, 'd': 1, 'x': 'y'})


In [7]:
dl = defaultdict(list)
cities = ['Colombo','Delhi','Bahrain','Seoul', 'Bangalore',
          'Chennai','Beunos Aires', 'Philadelphia', 'Mississippi']

for c in cities:
    dl[len(c)].append(c)
    
print dl


defaultdict(<type 'list'>, {9: ['Bangalore'], 11: ['Mississippi'], 12: ['Beunos Aires', 'Philadelphia'], 5: ['Delhi', 'Seoul'], 7: ['Colombo', 'Bahrain', 'Chennai']})


In [10]:
### defaultdict
import itertools
import collections

# Provide default values for dictionaries by using type factories
freq = collections.defaultdict(int)

names = ['Anand', 'Appu', 'Amitabh', 'Ajith', 'Anjana']
# Frequency of letters

for c in itertools.chain(*names):
    freq[c] += 1

print freq

defaultdict(<type 'int'>, {'A': 5, 'a': 4, 'b': 1, 'd': 1, 'i': 2, 'h': 2, 'j': 2, 'm': 1, 'n': 4, 'p': 2, 'u': 1, 't': 2})


In [11]:
### With lists
# Grouping with first character
names = ['Bipin', 'Vignesh', 'Dilip', 'Sony', 'Tara',
         'Seema', 'Raj', 'Rajiv', 'Gopal', 'Fathima',
         'Ira', 'Indira', 'Boris', 'Girish', 'Rohan',
         'Sateesh', 'Suraj', 'Tina', 'Anand', 'Abhinav']

groups = collections.defaultdict(list)
for name in names:
    groups[name[0]].append(name)

print groups

defaultdict(<type 'list'>, {'A': ['Anand', 'Abhinav'], 'B': ['Bipin', 'Boris'], 'D': ['Dilip'], 'G': ['Gopal', 'Girish'], 'F': ['Fathima'], 'I': ['Ira', 'Indira'], 'S': ['Sony', 'Seema', 'Sateesh', 'Suraj'], 'R': ['Raj', 'Rajiv', 'Rohan'], 'T': ['Tara', 'Tina'], 'V': ['Vignesh']})


### OrderedDict

1. Preserves order of entries in the dictionary.
2. Works like combination of dict and list.

In [106]:
from collections import OrderedDict

cities = ['Jakarta','Delhi','Newyork','Bonn','Kolkata',
          'Bangalore','Seoul']
# Dictionary
cities_dict = dict.fromkeys(cities)
# Order changes
print(cities_dict)


# Ordered dictionary
ocities_dict = OrderedDict.fromkeys(cities)
# Order is preserved
print(ocities_dict)

{'Seoul': None, 'Newyork': None, 'Jakarta': None, 'Delhi': None, 'Bonn': None, 'Kolkata': None, 'Bangalore': None}
OrderedDict([('Jakarta', None), ('Delhi', None), ('Newyork', None), ('Bonn', None), ('Kolkata', None), ('Bangalore', None), ('Seoul', None)])


In [13]:
### Uses - drop duplicates while preserving order
cities = ['Jakarta','Delhi','Newyork','Bonn','Kolkata','Bangalore',
          'Bonn','Seoul','Delhi','Jakarta','Mumbai']
print collections.OrderedDict.fromkeys(cities).keys()

# print(cities_odict.keys())
# print(cities_odict.popitem())

['Jakarta', 'Delhi', 'Newyork', 'Bonn', 'Kolkata', 'Bangalore', 'Seoul', 'Mumbai']


In [22]:
## As a LRU - Least Recently Used cache
from collections import OrderedDict

class LRU(OrderedDict):
    """ Least recently used cache (LRU) dictionary
    using OrderedDict
    
    """
    
    def __init__(self, size=10):
        self.size = size
        # OrderedDict.__init__(self)
        super(LRU, self).__init__()

    def add(self, key):
        # If key is there delete and reinsert so
        # it moves to end.
        if key in self:
            del self[key]

        self[key] = 1
        if len(self)>self.size:
            # Pop from left
            self.popitem(last=False)

In [23]:
d = LRU(size=5)
d.add('a')
d.add('b')
d.add('c')
d.add('d')
d.add('e')
print d

LRU([('a', 1), ('b', 1), ('c', 1), ('d', 1), ('e', 1)])


In [117]:
d.add('a')
print d

LRU([('b', 1), ('c', 1), ('d', 1), ('e', 1), ('a', 1)])


In [118]:
d.add('b')
print d

LRU([('c', 1), ('d', 1), ('e', 1), ('a', 1), ('b', 1)])


In [119]:
# Add something else
d.add('f')
print d

LRU([('d', 1), ('e', 1), ('a', 1), ('b', 1), ('f', 1)])


### Counter

In [27]:
from collections import Counter

# Frequencies of items
names = ['Anand', 'Appu', 'Amitabh', 'Ajith', 'Anjana']
# Frequency of letters

freq = Counter(itertools.chain(*names))
print freq
print dict(freq)

print freq.most_common(5)

Counter({'A': 5, 'a': 4, 'n': 4, 'i': 2, 'h': 2, 'j': 2, 'p': 2, 't': 2, 'b': 1, 'd': 1, 'm': 1, 'u': 1})
{'A': 5, 'a': 4, 'b': 1, 'd': 1, 'i': 2, 'h': 2, 'j': 2, 'm': 1, 'n': 4, 'p': 2, 'u': 1, 't': 2}
[('A', 5), ('a', 4), ('n', 4), ('i', 2), ('h', 2)]


In [41]:
import requests

text=requests.get('https://www.gutenberg.org/files/2852/2852-0.txt').text
freq = Counter(text.split())

# Frequency of words
print freq.most_common(5)

# Frequency of letters

freq_c = Counter(filter(None, map(lambda x: x.strip(), text)))
print freq_c.most_common(5)

[(u'the', 3221), (u'of', 1690), (u'and', 1560), (u'to', 1450), (u'a', 1280)]
[(u'e', 31614), (u't', 23385), (u'o', 20483), (u'a', 20288), (u'n', 17187)]


### namedtuple

In [42]:
from collections import namedtuple

employee = namedtuple("Employee", "name age gender")
print employee

<class '__main__.Employee'>


In [45]:
employee.__bases__

(tuple,)

In [48]:
collections.__file__

'/usr/lib/python2.7/collections.pyc'

In [68]:
from varargs import multiply

multiply.func_code

import os

# os.__file__
# import math
# math.__file__

In [69]:
jessica = employee(name='Jessica', age=31, gender='F')
print jessica
arjun = employee(name='Arjun', age=25, gender='M')
print arjun

Employee(name='Jessica', age=31, gender='F')
Employee(name='Arjun', age=25, gender='M')


In [70]:
# A namedtuple is an iterator!
for field in arjun:
    print field

Arjun
25
M


In [138]:
arjun._fields

('name', 'age', 'gender')

In [74]:
# Even supports 'count' method like lists!
print arjun.count('Arjun')
print arjun.index('M')

1
2


In [75]:
arjun.salary = 400000

AttributeError: 'Employee' object has no attribute 'salary'

In [78]:
# _make is a factory on namedtuple 
employees = (('Anand', 41, 'M'), 
             ('Bibhas', 32, 'M'),
             ('Preethi', 25, 'F'),
             ('Gunjan', 21, 'F'))

print map(employee._make, employees)

[Employee(name='Anand', age=41, gender='M'), Employee(name='Bibhas', age=32, gender='M'), Employee(name='Preethi', age=25, gender='F'), Employee(name='Gunjan', age=21, gender='F')]


In [77]:
# One can even sub-class a namedtuple
class MyEmployee(employee):
    def get_gender(self):
        return self.gender

In [79]:
emps = map(MyEmployee._make, employees)
print emps[0]
print emps[0].get_gender()

Employee(name='Anand', age=41, gender='M')
M


In [28]:
def flatten(iterables):
    """ Return a generating flattening an iterator. In
    other words, if the iterator contains other iterables,
    yield items from them till no more iterables are found
    >>> list(flatten(range(5)))
    [0, 1, 2, 3, 4]
    >>> list(flatten(['python']))
    ['python']
    >>> list(flatten('python'))
    ['p', 'y', 't', 'h', 'o', 'n']
    >>> list(flatten([1,[2,[3,[4,[5]]]]]))
    [1, 2, 3, 4, 5]
    >>> list(flatten([1,[2,3],[4,5]]))
    [1, 2, 3, 4, 5]
    >>> list(flatten(dict(enumerate(range(5)))))
    [0, 1, 2, 3, 4]
    >>> list(flatten([1,2,'python',{3:4, 4:5}, ['perl']]))
    [1, 2, 'python', 3, 4, 'perl']
    >>> 
    """
    
    for i in itertools.chain(iterables):
        if hasattr(i, '__iter__'):
            for j in flatten(i): yield j
        else:
            yield i

In [30]:
l = [1, 2, [3,4], [5,6, [7,8]]]

In [31]:
list(flatten(l))

[1, 2, 3, 4, 5, 6, 7, 8]

In [34]:
list(flatten([1,2,'python',{3:4, 4:5}, [[['perl']]]]))

[1, 2, 'python', 3, 4, 'perl']

In [40]:
l = [1, 2, [4,5], [8,9]]
[] + l

[1, 2, [4, 5], [8, 9]]

In [106]:
nested = {1: {5: {9: [1,2,3]}, 10: []}, 8: {2:3}}


[8, 1]