#### **Write factorial function with tail recursion**


In [5]:
def fac(n, acc=1):
    if n == 0:
        return acc
    else:
        return fac(n - 1, n * acc)


In [6]:
fac(5)

120

### **Exercise 1**
You need to split a string into fields, but the delimiters (and spacing around them) aren’t consistent throughout the string,  
e.g.: line = 'asdf fjdk; afed, fjek,asdf, foo'

In [7]:
import re

line = 'asdf fjdk; afed, fjek,asdf, foo'
re.split(r'[;,\s]\s*', line)

['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

In [8]:
import re

line = 'asdf fjdk; afed, fjek,asdf, foo'

fields = re.split(r'[;,\s]\s*', line)

print(fields)

values = fields[::2]
delimiters = fields[1::2] + ['']

print(values)
print(delimiters)

['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']
['asdf', 'afed', 'asdf']
['fjdk', 'fjek', 'foo', '']


### **Exercise 2**
You need to check the start or end of a string for specific text patterns, such as filename extensions, URL schemes, and so on,  
e.g. if 'file.txt' ends with '.txt'

In [9]:
filename = 'file.txt'
filename.endswith('.txt')

True

In [10]:
url = 'http://www.python.org'
url.startswith('http:')

True

### **Exercise 3**
Extract dates in US format mm/dd/yyyy from a string  

text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'

In [11]:
import re

text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'

re.findall(r'\d+/\d+/\d+', text)

['11/27/2012', '3/13/2013']

In [12]:
import re

text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'

dates = re.findall(r'(\d+)/(\d+)/(\d+)', text)

print(dates)

for month, day, year in dates:
    print('{}-{}-{}'.format(year, month, day))

[('11', '27', '2012'), ('3', '13', '2013')]
2012-11-27
2013-3-13


### **Exercise 4**
You want to search for and replace a text pattern in a string, e.g., in  

text = 'yeah, but no, but yeah, but no, but yeah'  

you want to replace 'yeah' with 'yep'.  

In [13]:
text = 'yeah, but no, but yeah, but no, but yeah'

text.replace('yeah', 'yep')

'yep, but no, but yep, but no, but yep'

### **Exercise 5**
reverse the date 3/13/2013 to 2013-3-13

In [14]:
text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'

re.sub(r'(\d+)/(\d+)/(\d+)', r'\3-\1-\2', text)

'Today is 2012-11-27. PyCon starts 2013-3-13.'

### **Exercise 6**

Change month number to month abbreviation

In [17]:
import re

text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'

from calendar import month_abbr

def change_date(m):
    mon_name = month_abbr[int(m.group(1))]
    return '{} {} {}'.format(m.group(2), mon_name, m.group(3))

re.sub(r'(\d+)/(\d+)/(\d+)', change_date, text)

'Today is 27 Nov 2012. PyCon starts 13 Mar 2013.'

### **Exercise 7**
You need to search for and possibly replace text in a case-insensitive manner., e.g.,  

"UPPER PYTHON, Mixed Python, lower python" to "UPPER SNAKE, Mixed Snake, lower snake"


In [36]:
import re

def match_case(word):
    def replace(m):
        text = m.group()
        if text.isupper():
            return word.upper()
        elif text.islower():
            return word.lower()
        elif text[0].isupper():
            return word.capitalize()
        else:
            return word
    return replace

text = 'UPPER PYTHON, Mixed Python, lower python'

re.findall('python', text, flags=re.IGNORECASE)

re.sub('python', match_case('snake'), text, flags=re.IGNORECASE)


'UPPER SNAKE, Mixed Snake, lower snake'

### **Exercise 9**
strip string  

s = ' hello world \n' to s = 'hello world'  
t = '-----hello=====' to t = hello  
s = "hello world\n" to s = "hello world"  
s = 'python\fis\tawesome\r\n' to s = 'python is awesome'  

In [54]:
s = ' hello world \n'
s.strip()

'hello world'

In [53]:
t = '-----hello====='
t.lstrip('-').rstrip('=')

'hello'

In [52]:
s = 'python\fis\tawesome\r\n'
s.replace('\f', ' ').replace('\t', ' ').replace('\r', ' ').strip()

'python is awesome'

### **Exercise 10***

Write a tokenizer which splits expression into tokens with names:  
text = 'foo = 23 + 42 * 10' tokens = [('NAME', 'foo'), ('EQ', '='), ('NUM', '23'), ('PLUS', '+'), ('NUM', '42'), ('TIMES', '*'), ('NUM', '10')]  

hints:
 - use re module and scanner
 - Token name and value get by _lastgroup_ and _group()_ methods
 - store elements in collection.namedtuple

In [59]:


import re
NAME = r'(?P<NAME>[a-zA-Z_][a-zA-Z_0-9]*)'
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
TIMES = r'(?P<TIMES>\*)'
EQ = r'(?P<EQ>=)'
WS = r'(?P<WS>\s+)'
master_pat = re.compile('|'.join([NAME, NUM, PLUS, TIMES, EQ, WS]))

# Scanner
scanner = master_pat.scanner('foo = 42')
scanner.match()

<re.Match object; span=(0, 3), match='foo'>

In [60]:
_.lastgroup, _.group()

('NAME', 'foo')

In [63]:
import collections
Token = collections.namedtuple('Token', ['type', 'value'])

def generate_tokens(pat, text):
    scanner = pat.scanner(text)
    for m in iter(scanner.match, None):
        yield Token(m.lastgroup, m.group())

# Example use
for tok in generate_tokens(master_pat, 'foo = 42'):
    print(tok)


Token(type='NAME', value='foo')
Token(type='WS', value=' ')
Token(type='EQ', value='=')
Token(type='WS', value=' ')
Token(type='NUM', value='42')
