# Collections Module

## Counter

In [1]:
# Basically is a dictionary subclass which helps count hashable objects

In [3]:
from collections import Counter

In [4]:
l = [1,1,1,1,3,3,4,4,5,6,7,89,67,5,5,5,67,89]

In [7]:
Counter(l)

Counter({1: 4, 3: 2, 4: 2, 5: 4, 6: 1, 7: 1, 67: 2, 89: 2})

In [8]:
s = 'aaaccbjbjjsbjdsowerepqwoerrtwlbncvbdfg'
Counter(s)

Counter({'a': 3,
         'b': 5,
         'c': 3,
         'd': 2,
         'e': 3,
         'f': 1,
         'g': 1,
         'j': 4,
         'l': 1,
         'n': 1,
         'o': 2,
         'p': 1,
         'q': 1,
         'r': 3,
         's': 2,
         't': 1,
         'v': 1,
         'w': 3})

In [10]:
s = "How many time a word show up in this sentence word show up times many how a"

In [11]:
words = s.split()

In [12]:
Counter(words)

Counter({'How': 1,
         'a': 2,
         'how': 1,
         'in': 1,
         'many': 2,
         'sentence': 1,
         'show': 2,
         'this': 1,
         'time': 1,
         'times': 1,
         'up': 2,
         'word': 2})

In [13]:
c = Counter(words)

In [14]:
c.most_common(3)

[('many', 2), ('a', 2), ('word', 2)]

In [15]:
sum(c.values())

17

## defaultdict

In [16]:
from collections import defaultdict

In [18]:
d = {'k1':'v1'}
d['k1']

'v1'

In [19]:
d['k2']

KeyError: 'k2'

In [20]:
d = defaultdict(object)

In [21]:
d['one']

<object at 0x551c2e0>

In [23]:
for item in d:
    print (item)

one


In [24]:
d = defaultdict(lambda: 0)

In [25]:
d['one']

0

In [26]:
d['two'] = 2

In [27]:
d

defaultdict(<function __main__.<lambda>>, {'one': 0, 'two': 2})

## Ordered Dictionary

In [30]:
d = {}
d['a'] = 1
d['b'] = 2
d['c'] = 3
d['d'] = 4
d['e'] = 5

In [31]:
d

{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}

In [34]:
for k,v in d.items():
    print (k,v)

a 1
b 2
c 3
d 4
e 5


In [35]:
from collections import OrderedDict

In [36]:
d = OrderedDict()

In [37]:
d['a'] = 1
d['b'] = 2
d['c'] = 3
d['d'] = 4
d['e'] = 5

In [38]:
for k,v in d.items():
    print (k,v)

a 1
b 2
c 3
d 4
e 5


In [39]:
d1 = {}
d1['a'] = 1
d1['b'] = 2

d2 = {}
d2['b'] = 2
d2['a'] = 1

In [40]:
print(d1==d2)

True


In [41]:
d1 = OrderedDict()
d1['a'] = 1
d1['b'] = 2

d2 = OrderedDict()
d2['b'] = 2
d2['a'] = 1

print(d1==d2)

False


## namedtuple

In [42]:
t = (1,2,3)
t[0]

1

In [44]:
from collections import namedtuple

In [45]:
Dog = namedtuple('Dog','age breed name')
sam = Dog(age=2, breed='Lab', name = 'Sam')

In [46]:
sam

Dog(age=2, breed='Lab', name='Sam')

In [47]:
sam.age

2

In [48]:
sam.breed

'Lab'

In [49]:
Cat = namedtuple('Cat','fur claws name')
kitty = Cat(fur = 'fuzzy', claws = False, name = 'kitty')

In [50]:
kitty

Cat(fur='fuzzy', claws=False, name='kitty')

In [51]:
kitty.fur

'fuzzy'

## Others

### Datetime

In [52]:
import datetime

In [55]:
#datetime.time(h,m,s)
t = datetime.time(5,25,1)

In [56]:
print (t)

05:25:01


In [58]:
print (datetime.time.min)

00:00:00


In [59]:
print (datetime.time.max)

23:59:59.999999


In [61]:
print (datetime.time.resolution)

0:00:00.000001


In [62]:
today = datetime.date.today()

In [63]:
print(today)

2017-11-17


In [64]:
today.timetuple()

time.struct_time(tm_year=2017, tm_mon=11, tm_mday=17, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=4, tm_yday=321, tm_isdst=-1)

In [65]:
today.year

2017

In [66]:
today.month

11

In [67]:
today.day

17

In [68]:
print (datetime.date.min)

0001-01-01


In [69]:
print (datetime.date.max)

9999-12-31


In [70]:
print (datetime.date.resolution)

1 day, 0:00:00


In [73]:
d1 = datetime.date(2017,11,17)
print (d1)

2017-11-17


In [74]:
d2 = d1.replace(year=1990)

In [75]:
print (d2)

1990-11-17


In [76]:
print (d1 - d2)

9862 days, 0:00:00


### Python Debugger - pdb

In [77]:
import pdb

In [81]:
x = [1,3,4]
y = 2
z = 3

res1 = y+z
print (res1)

pdb.set_trace()

res2 = y+x
print (res2)

5
--Return--
> <ipython-input-81-f4dc5c59a9b3>(8)<module>()->None
-> pdb.set_trace()
(Pdb) x
[1, 3, 4]
(Pdb) y
2
(Pdb) z
3
(Pdb) y+z
5
(Pdb) x**2
*** TypeError: unsupported operand type(s) for ** or pow(): 'list' and 'int'
(Pdb) y**2
4
(Pdb) q


BdbQuit: 

### Timing your code - timeit

In [82]:
# Shows time taken to execute the code
import timeit

In [83]:
'0-1-2-3-...-99'

'0-1-2-3-...-99'

In [84]:
"-".join(str(n) for n in range(100))

'0-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-35-36-37-38-39-40-41-42-43-44-45-46-47-48-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-69-70-71-72-73-74-75-76-77-78-79-80-81-82-83-84-85-86-87-88-89-90-91-92-93-94-95-96-97-98-99'

In [85]:
timeit.timeit("-".join(str(n) for n in range(100)), number = 10000)

0.028865743411536658

In [86]:
timeit.timeit("-".join([str(n) for n in range(100)]), number = 10000)

0.04040202680323546

In [87]:
timeit.timeit('"-".join(map(str,range(100)))',number = 10000)

0.19512008526182

In [88]:
#best way - using %
%timeit "-".join(map(str,range(100)))

16.6 µs ± 127 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [91]:
%timeit "-".join(str(n) for n in range(100))

24.5 µs ± 258 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [93]:
%timeit "-".join([str(n) for n in range(100)])

20.3 µs ± 138 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Regular Expressions - re

In [94]:
import re

In [101]:
patterns = ['term1', 'term2']

In [102]:
text = "This is a string with term1, but not the other term"

In [103]:
re.search('hello', 'hello world')

<_sre.SRE_Match object; span=(0, 5), match='hello'>

In [104]:
for pattern in patterns:
    print ('Searching for {} in: \n{}'.format(pattern, text))
    
    #Check for match
    if re.search(pattern,  text):
        print ('\n')
        print ('Match was found. \n')
    else:
        print ('\n')
        print ('No Match was found.\n')

Searching for term1 in: 
This is a string with term1, but not the other term


Match was found. 

Searching for term2 in: 
This is a string with term1, but not the other term


No Match was found.



In [105]:
print (re.search('h','w'))

None


In [106]:
match = re.search(patterns[0],text)

In [107]:
type(match)

_sre.SRE_Match

In [109]:
match.start()

22

In [110]:
match.end()

27

In [111]:
match.span()

(22, 27)

In [112]:
split_term = '@'
phrase = "what is your email? hello@gmail.com"
re.split(split_term,phrase)

['what is your email? hello', 'gmail.com']

In [113]:
'hello world'.split()

['hello', 'world']

In [119]:
re.findall('match','here is one match, here is another match')

['match', 'match']

### Repetition Syntax

In [117]:
def multi_re_find(patterns,phrase):
    '''
    Takes in a list of regex patterns
    Prints a list of all matches
    '''
    for pattern in patterns:
        print ('Searching the phrase using the re check: {}'.format(pattern))
        print (re.findall(pattern,phrase))
        print ('\n')

In [118]:
test_phrase = 'sdsd..sssddd...sdddsddd...dsds...dsssss...sdddd'

test_patterns = [ 'sd*',     # s followed by zero or more d's
                'sd+',          # s followed by one or more d's
                'sd?',          # s followed by zero or one d's
                'sd{3}',        # s followed by three d's
                'sd{2,3}',      # s followed by two to three d's
                ]

multi_re_find(test_patterns,test_phrase)

Searching the phrase using the re check: sd*
['sd', 'sd', 's', 's', 'sddd', 'sddd', 'sddd', 'sd', 's', 's', 's', 's', 's', 's', 'sdddd']


Searching the phrase using the re check: sd+
['sd', 'sd', 'sddd', 'sddd', 'sddd', 'sd', 'sdddd']


Searching the phrase using the re check: sd?
['sd', 'sd', 's', 's', 'sd', 'sd', 'sd', 'sd', 's', 's', 's', 's', 's', 's', 'sd']


Searching the phrase using the re check: sd{3}
['sddd', 'sddd', 'sddd', 'sddd']


Searching the phrase using the re check: sd{2,3}
['sddd', 'sddd', 'sddd', 'sddd']




## Character set

In [120]:
test_phrase = 'sdsd..sssddd...sdddsddd...dsds...dsssss...sdddd'

test_patterns = [ '[sd]',    # either s or d
            's[sd]+']   # s followed by one or more s or d
            

multi_re_find(test_patterns,test_phrase)

Searching the phrase using the re check: [sd]
['s', 'd', 's', 'd', 's', 's', 's', 'd', 'd', 'd', 's', 'd', 'd', 'd', 's', 'd', 'd', 'd', 'd', 's', 'd', 's', 'd', 's', 's', 's', 's', 's', 's', 'd', 'd', 'd', 'd']


Searching the phrase using the re check: s[sd]+
['sdsd', 'sssddd', 'sdddsddd', 'sds', 'sssss', 'sdddd']




### Exclusion

In [121]:
test_phrase = 'This is a string! But it has punctuation. How can we remove it?'

In [122]:
re.findall('[^!.? ]+',test_phrase)

['This',
 'is',
 'a',
 'string',
 'But',
 'it',
 'has',
 'punctuation',
 'How',
 'can',
 'we',
 'remove',
 'it']

### Character Ranges

In [123]:
test_phrase = 'This is an example sentence. Lets see if we can find some letters.'

test_patterns=[ '[a-z]+',      # sequences of lower case letters
                '[A-Z]+',      # sequences of upper case letters
                '[a-zA-Z]+',   # sequences of lower or upper case letters
                '[A-Z][a-z]+'] # one upper case letter followed by lower case letters
                
multi_re_find(test_patterns,test_phrase)

Searching the phrase using the re check: [a-z]+
['his', 'is', 'an', 'example', 'sentence', 'ets', 'see', 'if', 'we', 'can', 'find', 'some', 'letters']


Searching the phrase using the re check: [A-Z]+
['T', 'L']


Searching the phrase using the re check: [a-zA-Z]+
['This', 'is', 'an', 'example', 'sentence', 'Lets', 'see', 'if', 'we', 'can', 'find', 'some', 'letters']


Searching the phrase using the re check: [A-Z][a-z]+
['This', 'Lets']




### Escape Code

In [124]:
test_phrase = 'This is a string with some numbers 1233 and a symbol #hashtag'

test_patterns=[ r'\d+', # sequence of digits
                r'\D+', # sequence of non-digits
                r'\s+', # sequence of whitespace
                r'\S+', # sequence of non-whitespace
                r'\w+', # alphanumeric characters
                r'\W+', # non-alphanumeric
                ]

multi_re_find(test_patterns,test_phrase)

Searching the phrase using the re check: \d+
['1233']


Searching the phrase using the re check: \D+
['This is a string with some numbers ', ' and a symbol #hashtag']


Searching the phrase using the re check: \s+
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']


Searching the phrase using the re check: \S+
['This', 'is', 'a', 'string', 'with', 'some', 'numbers', '1233', 'and', 'a', 'symbol', '#hashtag']


Searching the phrase using the re check: \w+
['This', 'is', 'a', 'string', 'with', 'some', 'numbers', '1233', 'and', 'a', 'symbol', 'hashtag']


Searching the phrase using the re check: \W+
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' #']




### String IO

In [129]:
#Python 2 - StringIO
#Python 3 - io.StringIO or io.BytesIO
import io

In [130]:
msg = "This is a normal string"

In [131]:
f = io.StringIO(msg)

In [132]:
f.read()

'This is a normal string'