# 2주차 발표 소단원 리스트

(2, 18)
(3, 2)
(3, 6)
(3, 10)
(3, 14)
(4, 2)
(4, 6)
(4, 10)

# 전체 요약자료

(2, 18) [문자열 token화] : scanner 함수 사용

(3, 2) [정확한 계산] : from decimal import Decimal

(3, 6) [복소수 계산] : cmath,np

(3, 10) [행렬 계산] : np.linalg

(3, 14) [특정 달의 날짜 범위 계산하기]

(4, 2) [iter 새로 정의하기] : "def \__iter\__"

(4, 6) [iter 새로 정의해서 활용도 높이기] : 예시

(4, 10) [enumerate 사용하자]

### 2-18) Tokenizing Text
- 문자열을 token화 시키기 (분해)
- (sol) re compile된 객체에 scanner 함수 적용!!

In [None]:
text = 'foo = 23 + 42 * 10'

In [100]:
import re
# ?P<변수명> -> 나중에 매칭되는 것의 이름이 됨!!
NAME = r'(?P<NAME>[a-zA-Z_][a-zA-Z_0-9]*)'
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
TIMES = r'(?P<TIMES>\*)'
EQ = r'(?P<EQ>=)'
WS = r'(?P<WS>\s+)'

In [109]:
master_pat = re.compile("|".join([NAME,NUM,PLUS,TIMES,EQ,WS]))
scanner = master_pat.scanner('foo = 42')

In [110]:
scanner.match()

<_sre.SRE_Match object; span=(0, 3), match='foo'>

In [111]:
_.lastgroup,_.group()

('NAME', 'foo')

In [112]:
scanner.match()

<_sre.SRE_Match object; span=(3, 4), match=' '>

In [113]:
_.lastgroup,_.group()

('WS', ' ')

In [114]:
scanner.match()

<_sre.SRE_Match object; span=(4, 5), match='='>

In [115]:
_.lastgroup,_.group()

('EQ', '=')

### 위의 내용 iteration 형태

In [131]:
from collections import namedtuple
Token = namedtuple('Token',['type','value'])
def generate_token(pat,text):
    scanner = pat.scanner(text)
    # called until None is returnd
    for m in iter(scanner.match,None): 
        yield Token(m.lastgroup,m.group())

In [133]:
for tok in generate_token(master_pat,'foo = 42'):
    print(tok)

Token(type='NAME', value='foo')
Token(type='WS', value=' ')
Token(type='EQ', value='=')
Token(type='WS', value=' ')
Token(type='NUM', value='42')


In [139]:
text = 'foo = 23 + 42 * 10'
tokens = (tok for tok in generate_token(master_pat,text) if tok.type != 'WS')
list(tokens)

[Token(type='NAME', value='foo'),
 Token(type='EQ', value='='),
 Token(type='NUM', value='23'),
 Token(type='PLUS', value='+'),
 Token(type='NUM', value='42'),
 Token(type='TIMES', value='*'),
 Token(type='NUM', value='10')]

### token 사용시 주의사항
1. 모든 패턴을 다 고려해줘야함!!
2. re에서 사용된 패턴이 앞에부터 사용되므로, 긴 것을 앞에 배치해야 함 
    - '<'과 '<='이 공존하면 '<='를 먼저 사용해야 인식됨!
3. 패턴 간 관계 주의 (2와 유사)

In [None]:
## 2
LT = r'(?P<LT><)'
LE = r'(?P<LE><=)'
EQ = r'(?<EQ>=)'
master_pat = "|".join([LE,LT,EQ]) # correct
# master_pat = "|".join([LT,LE,EQ]) # Incorrect

In [141]:
## 3
PRINT = r'(?P<PRINT>print)'
NAME = r'(?P<NAME>[a-zA-Z_][a-zA-z_0-9]*)'
master_pat = re.compile("|".join([PRINT,NAME]))
for tok in generate_token(master_pat,'printer'):
    print(tok)

Token(type='PRINT', value='print')
Token(type='NAME', value='er')


- 추가적으로 PyParsing 과 PLY 참고하기 : PLY는 2-19번)에서도 다룸!

### 3-2) Performing Accurate Decimal Calculations
- 소숫점 아래 정확한 계산이 필요한 경우! -> 주로 financial 영역 계산, 데이터!!
- (sol) from decimal import Decimal :: 속도가 조금 느려질 가능성!!
    - from decimal import localcontext 로 환경 구축!!
    - sum 대신 math.fsum 사용!!

In [13]:
a = 2.1
b = 4.2
a+b == 6.3


False

In [16]:
from decimal import Decimal
a = Decimal('4.2') # 반드시 문자열로 넣어주기!!
b = Decimal('2.1')
a+b

Decimal('6.3')

In [17]:
print(a+b)

6.3


In [18]:
(a+b) == Decimal('6.3')

True

In [20]:
### decimal로 rounding 환경 설정 가능
from decimal import localcontext
a = Decimal('1.3')
b = Decimal('1.7')
print(a/b)

0.7647058823529411764705882353


In [21]:
with localcontext() as ctx:
    ctx.prec = 3
    print(a/b)

0.765


In [22]:
with localcontext() as ctx:
    ctx.prec = 50
    print(a/b)

0.76470588235294117647058823529411764705882352941176


### decimal에 비해 기존 float 시스템의 장점
    1. 발생가능한 계산에러에 비해 속도가 빠르다

In [23]:
nums = [1.23e+18,1,-1.23e+18]
sum(nums) # 1이 계산에서 무시되는..

0.0

In [24]:
import math
math.fsum(nums) # fsum 사용!!

1.0

### 3-6) performing complex-valued math
- (sol)
    1. math 대신 cmath 사용
    2. 대부분 np(numpy)로 해결하기

In [55]:
# complex(real,imags)
a = complex(2,4)
b = 3 - 5j
a

(2+4j)

In [56]:
b

(3-5j)

In [57]:
a.real

2.0

In [58]:
a.imag

4.0

In [63]:
a.conjugate()

(2-4j)

In [59]:
a+b

(5-1j)

In [64]:
a*b

(26+2j)

In [60]:
a/b

(-0.4117647058823529+0.6470588235294118j)

In [61]:
abs(a) # == sqrt(a*a.conjugate())

4.47213595499958

In [68]:
## sin,cos,exp 변환
import cmath
cmath.sin(a)

(24.83130584894638-11.356612711218174j)

In [69]:
cmath.cos(a)

(-11.36423470640106-24.814651485634187j)

In [70]:
cmath.exp(a)

(-4.829809383269385-5.5920560936409816j)

### numpy 이용하는게 가장 일반적

In [71]:
import numpy as np
a = np.array([2+3j,4+5j,6-7j,8+9j])
a

array([2.+3.j, 4.+5.j, 6.-7.j, 8.+9.j])

In [72]:
np.sin(a)

array([   9.15449915  -4.16890696j,  -56.16227422 -48.50245524j,
       -153.20827755-526.47684926j, 4008.42651446-589.49948373j])

In [73]:
np.sin(complex(2,4))

(24.83130584894638-11.356612711218173j)

In [74]:
import math
math.sqrt(-1)

ValueError: math domain error

In [75]:
import cmath
cmath.sqrt(-1)

1j

### 3-10) Performing matrix and linear algebra calculations
- (sol) numpy 이용! : np.linalg

In [121]:
import numpy as np
m = np.matrix([[1,-2,3],[0,4,5],[7,8,-9]])
m

matrix([[ 1, -2,  3],
        [ 0,  4,  5],
        [ 7,  8, -9]])

In [122]:
m.T

matrix([[ 1,  0,  7],
        [-2,  4,  8],
        [ 3,  5, -9]])

In [123]:
m.I # inverse

matrix([[ 0.33043478, -0.02608696,  0.09565217],
        [-0.15217391,  0.13043478,  0.02173913],
        [ 0.12173913,  0.09565217, -0.0173913 ]])

In [133]:
v = np.matrix([[2.],[3.],[4.]]) # vector

In [126]:
m * v # 그냥 * 쓰면 됨

matrix([[ 8],
        [32],
        [ 2]])

In [127]:
np.linalg.det(m)

-229.99999999999983

In [128]:
np.linalg.eigvals(m)

array([-13.11474312,   2.75956154,   6.35518158])

In [134]:
x = np.linalg.solve(m,v) # mx = v 인 x 찾기

In [137]:
m*x 

matrix([[2.],
        [3.],
        [4.]])

### 3-14) Finding the date range for the current month
- (sol) calender 사용

In [222]:
from datetime import datetime,date,timedelta
import calendar

In [226]:
def get_month_range(start_date=None):
    if start_date is None:
        start_date = datetime.today().replace(day=1) # 시작일을 1로 함
    _,days_in_month = calendar.monthrange(start_date.year,start_date.month)
    end_date = start_date + timedelta(days = days_in_month) # end_date 계산해냄
    return (start_date,end_date) 

In [227]:
a_day = timedelta(days=1)
a_day

datetime.timedelta(1)

In [228]:
first_day, last_day = get_month_range()
while first_day < last_day:
    print(first_day)
    first_day += a_day

2019-03-01 20:48:25.851657
2019-03-02 20:48:25.851657
2019-03-03 20:48:25.851657
2019-03-04 20:48:25.851657
2019-03-05 20:48:25.851657
2019-03-06 20:48:25.851657
2019-03-07 20:48:25.851657
2019-03-08 20:48:25.851657
2019-03-09 20:48:25.851657
2019-03-10 20:48:25.851657
2019-03-11 20:48:25.851657
2019-03-12 20:48:25.851657
2019-03-13 20:48:25.851657
2019-03-14 20:48:25.851657
2019-03-15 20:48:25.851657
2019-03-16 20:48:25.851657
2019-03-17 20:48:25.851657
2019-03-18 20:48:25.851657
2019-03-19 20:48:25.851657
2019-03-20 20:48:25.851657
2019-03-21 20:48:25.851657
2019-03-22 20:48:25.851657
2019-03-23 20:48:25.851657
2019-03-24 20:48:25.851657
2019-03-25 20:48:25.851657
2019-03-26 20:48:25.851657
2019-03-27 20:48:25.851657
2019-03-28 20:48:25.851657
2019-03-29 20:48:25.851657
2019-03-30 20:48:25.851657
2019-03-31 20:48:25.851657


In [264]:
calendar.monthrange(2019,1) # 시작 날의 요일(index)과 마지막 날

(1, 31)

In [266]:
def date_range(start,stop,step):
    while start < stop:
        yield start
        start += step
        
for d in date_range(datetime(2012,9,1),datetime(2012,10,1),timedelta(hours=6)):
    print(d)

2012-09-01 00:00:00
2012-09-01 06:00:00
2012-09-01 12:00:00
2012-09-01 18:00:00
2012-09-02 00:00:00
2012-09-02 06:00:00
2012-09-02 12:00:00
2012-09-02 18:00:00
2012-09-03 00:00:00
2012-09-03 06:00:00
2012-09-03 12:00:00
2012-09-03 18:00:00
2012-09-04 00:00:00
2012-09-04 06:00:00
2012-09-04 12:00:00
2012-09-04 18:00:00
2012-09-05 00:00:00
2012-09-05 06:00:00
2012-09-05 12:00:00
2012-09-05 18:00:00
2012-09-06 00:00:00
2012-09-06 06:00:00
2012-09-06 12:00:00
2012-09-06 18:00:00
2012-09-07 00:00:00
2012-09-07 06:00:00
2012-09-07 12:00:00
2012-09-07 18:00:00
2012-09-08 00:00:00
2012-09-08 06:00:00
2012-09-08 12:00:00
2012-09-08 18:00:00
2012-09-09 00:00:00
2012-09-09 06:00:00
2012-09-09 12:00:00
2012-09-09 18:00:00
2012-09-10 00:00:00
2012-09-10 06:00:00
2012-09-10 12:00:00
2012-09-10 18:00:00
2012-09-11 00:00:00
2012-09-11 06:00:00
2012-09-11 12:00:00
2012-09-11 18:00:00
2012-09-12 00:00:00
2012-09-12 06:00:00
2012-09-12 12:00:00
2012-09-12 18:00:00
2012-09-13 00:00:00
2012-09-13 06:00:00


### 4-2) Delegating Iteration
- (goal) build a custom container object that internally holds 
- (sol) __iter__ 

In [8]:
class Node:
    def __init__(self,value):
        self._value = value;
        self._children = []
        self.idx = 0
        
    def __repr__(self):
        return 'Node({!r})'.format(self._value)
    
    def add_child(self,node):
        self._children.append(node)
    
    ## iterable
    def __iter__(self):
        return iter(self._children)
    
    ## next method
    def __next__(self):
        ret = self._children[self.idx]
        self.idx+=1
        return ret
    
    
if __name__ == '__main__':
    root = Node(0)
    child1 = Node(1)
    child2 = Node(2)
    root.add_child(child1)
    root.add_child(child2)
    print(next(root))
    print(next(root))
#    print(next(root,None))
    
    #for ch in root: # __iter__ method is called
    #    print(ch)

Node(1)
Node(2)


### 4-6) Defining Generator Functions with extra state
- history를 남기는 예제
- (sol) class를 새로 정의

In [None]:
## 4_6_example.txt
'''
first que
second python que
third que
forth python que
fifth que
'''

In [1]:
from collections import deque

class linehistory:
    def __init__(self,lines,histlen = 3):
        self.lines = lines
        self.history = deque(maxlen = histlen)
        
    def __iter__(self):
        ## 1부터 시작
        for lineno,line in enumerate(self.lines,1):
            self.history.append((lineno,line))
            yield line ## history 에는 3개만 저장하고 출력은 다 하기!!
            
    def clear(self):
        self.history.clear()

In [4]:
with open('4_6_example.txt') as f:
    lines = linehistory(f)
    for line in lines:
        if 'python' in line:
            print("*"*20)
            for lineno,hline in lines.history:
                print("{}:{}".format(lineno,hline),end='')


********************
1:first que
2:second python que
********************
2:second python que
3:third que
4:forth python que


### iterator 장점을 더 잘 활용하려면 class를 정의하고 __iter__나 __next__를 새롭게 정의하기!!

- 추가적으로 주의할 사항으로는 클래스를 새로 정의한 경우, iter(객체)를 먼저해주고, next를 사용해야함!!

In [13]:
f = open('4_6_example.txt')
lines = linehistory(f)
next(lines)

TypeError: 'linehistory' object is not an iterator

In [14]:
it = iter(lines)
next(it)

'first que\n'

In [15]:
next(it) # next는 기존의 정의된 형태로 사용됨!

'second python que\n'

### 4-10) Iterating over the index-value pairs of a sequence
- (sol) enumerate를 사용 (useful)

In [33]:
my_list = ['a','b','c']
for idx,val in enumerate(my_list):
    print(idx,val)

0 a
1 b
2 c


In [34]:
## 시작을 1부터!!
for idx,val in enumerate(my_list,1):
    print(idx,val)

1 a
2 b
3 c


In [None]:
## 문제가 있는 문장 찾아서 위치랑 같이 출력할 때!!
def parse_data(filename):
    with open(filename,'rt') as f:
        for lineno,line in enumerate(f,1):
            fields = line.split()
            try:
                count = int(fields[1])
            except ValueError as e:
                print('Line {}: Parse error: {}'.format(lineno,e))

In [None]:
### 문장 내 단어의 출현 문장 위치를 저장하기
from collections import defaultdict
word_summary = defaultdict(list)

with open("myfile.txt",'r') as f:
    lines = f.readlines()
    
    for idx,line in enumerate(lines):
        words = [w.strip().lower() for w in line.split()]
        for word in words:
            word_summary[word].append(idx)

In [35]:
## 주의사항 : tuple의 원소 각각 뽑을 때는 tuple 형식 유지해주기 ()
data=[(1,2),(3,4),(5,6),(7,8)]
for n,(x,y) in enumerate(data):
    print(n,x,y)

0 1 2
1 3 4
2 5 6
3 7 8


In [36]:
## error
for n,x,y in enumerate(data):
    print(n,x,y)

ValueError: not enough values to unpack (expected 3, got 2)

In [None]:
### 4-10) Iterating over the index-value pairs of a sequence
- (sol) enumerate를 사용

my_list = ['a','b','c']
for idx,val in enumerate(my_list):
    print(idx,val)

## 시작을 1부터!!
for idx,val in enumerate(my_list,1):
    print(idx,val)

## 문제가 있는 문장 찾아서 위치랑 같이 출력할 때!!
def parse_data(filename):
    with open(filename,'rt') as f:
        for lineno,line in enumerate(f,1):
            fields = line.split()
            try:
                count = int(fields[1])
            except ValueError as e:
                print('Line {}: Parse error: {}'.format(lineno,e))

### 문장 내 단어의 출현 문장 위치를 저장하기
from collections import defaultdict
word_summary = defaultdict(list)

with open("myfile.txt",'r') as f:
    lines = f.readlines()
    
    for idx,line in enumerate(lines):
        words = [w.strip().lower() for w in line.split()]
        for word in words:
            word_summary[word].append(idx)

## 주의사항 : tuple의 원소 각각 뽑을 때는 tuple 형식 유지해주기 ()
data=[(1,2),(3,4),(5,6),(7,8)]
for n,(x,y) in enumerate(data):
    print(n,x,y)

## error
for n,x,y in enumerate(data):
    print(n,x,y)