# 부동소수점

In [1]:
b = 0.35

In [2]:
b + 0.1

0.44999999999999996

In [3]:
c = 0.5

In [4]:
c.as_integer_ratio()

(1, 2)

In [5]:
b.as_integer_ratio()

(3152519739159347, 9007199254740992)

# decimal 함수로 소수점 정밀도 조절

In [6]:
import decimal
from decimal import Decimal

In [7]:
decimal.getcontext()

Context(prec=28, rounding=ROUND_HALF_EVEN, Emin=-999999, Emax=999999, capitals=1, clamp=0, flags=[], traps=[InvalidOperation, DivisionByZero, Overflow])

In [8]:
d = Decimal(1)/Decimal(11)
d

Decimal('0.09090909090909090909090909091')

In [9]:
1/11

0.09090909090909091

In [10]:
decimal.getcontext().prec = 4
e = Decimal(1)/Decimal(11)
e

Decimal('0.09091')

In [11]:
decimal.getcontext().prec = 50
f = Decimal(1)/Decimal(11)
f

Decimal('0.090909090909090909090909090909090909090909090909091')

# 문자 정규표현식

In [12]:
import re

In [13]:
series = """
'01/18/2014 13:00:00', 100, '1st';
'01/18/2014 13:30:00', 110, '2st';
'01/18/2014 14:00:00', 120, '3st' """

In [14]:
dt = re.compile("'[0-9/:\s]+'") # datetime

In [15]:
result = dt.findall(series)
result

["'01/18/2014 13:00:00'", "'01/18/2014 13:30:00'", "'01/18/2014 14:00:00'"]

In [16]:
result

["'01/18/2014 13:00:00'", "'01/18/2014 13:30:00'", "'01/18/2014 14:00:00'"]

In [17]:
from datetime import datetime
pydt = datetime.strptime(result[0].replace("'", ""),'%m/%d/%Y %H:%M:%S')
pydt

datetime.datetime(2014, 1, 18, 13, 0)

In [18]:
print(pydt)

2014-01-18 13:00:00


In [19]:
print(type(pydt))

<class 'datetime.datetime'>


# 튜플

In [20]:
t = (1,2.5,'data') # 원소 수정 X

In [21]:
# count 와 index 만 제공

In [22]:
t.count('data')

1

In [23]:
t.index(2.5)

1

# 리스트 (튜플과 다르게 원소 변경가능)

In [24]:
l = [2,3.5,'data_']

In [25]:
l = list(t)

In [26]:
l

[1, 2.5, 'data']

In [27]:
l.append([4,3]) # 끝에 원소로 추가
l

[1, 2.5, 'data', [4, 3]]

In [28]:
l.extend([1.0,1.5,2.0]) # 각각 나누어서 추가
l

[1, 2.5, 'data', [4, 3], 1.0, 1.5, 2.0]

In [29]:
l.insert(1,'insert') # 특정 인덱스 위치에 추가
l

[1, 'insert', 2.5, 'data', [4, 3], 1.0, 1.5, 2.0]

In [30]:
l.remove('data') # 처음으로 나타나는 원소 삭제
l

[1, 'insert', 2.5, [4, 3], 1.0, 1.5, 2.0]

In [31]:
p = l.pop(3) # 특정 인덱스 원소 삭제
l

[1, 'insert', 2.5, 1.0, 1.5, 2.0]

# 반복문

In [32]:
m = [i ** 2 for i in range(5)]

In [33]:
m

[0, 1, 4, 9, 16]

In [34]:
r = list(range(0,8,1))
r

[0, 1, 2, 3, 4, 5, 6, 7]

# Function

In [35]:
def even(x):
    return x % 2 == 0
even(3)

False

In [36]:
list(map(even,range(10)))

[True, False, True, False, True, False, True, False, True, False]

In [37]:
list(map(lambda x: x ** 2, range(10))) # 함수 정의 안하고 lambda 쓰면 바로 가능

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [38]:
list(filter(even,range(15))) # filter - True 가 되는 원소들 모으기

[0, 2, 4, 6, 8, 10, 12, 14]

In [39]:
from functools import reduce

In [40]:
reduce(lambda x,y : x-y,range(1,6)) # reduce 는 내장 함수 아니여서 import 해줘야해
                                     # 그리고 누적 계산이야

-13

# dictionary

In [41]:
import six

In [42]:
d = {'name' : 'angela markel',
    'country' : 'germany',
    'age' : 60,
    'name' : 'angela'}

g = dict(a = 1,b = 2,c = 3,d = 2)

In [43]:
for item in six.iteritems(g):
    print(item)

('a', 1)
('b', 2)
('c', 3)
('d', 2)


In [44]:
for item in six.itervalues(g):
    print(item)

1
2
3
2


In [45]:
for value in d.values():
    print(value)

angela
germany
60


# set

In [46]:
s = set(['u','d','ud','du','d','du'])
s

{'d', 'du', 'u', 'ud'}

In [47]:
t = set(['d','dd','uu','u'])
t

{'d', 'dd', 'u', 'uu'}

In [48]:
s.union(t) # 합집합

{'d', 'dd', 'du', 'u', 'ud', 'uu'}

In [49]:
s.intersection(t) # 교집합

{'d', 'u'}

In [50]:
s.difference(t) # s 중 t와 겹치지 않는것

{'du', 'ud'}

In [51]:
s.symmetric_difference(t) # 둘중에 하나에만 속하는 것들 모두

{'dd', 'du', 'ud', 'uu'}

In [52]:
# 리스트 중 중복된 원소를 제거할때 이용하기도함

from random import randint
l = [randint(0,10) for i in range(100)] # 0~10의 임의의 정수 100개 추출

In [53]:
l[:20]

[7, 7, 1, 5, 10, 0, 4, 2, 0, 8, 1, 6, 8, 6, 9, 10, 1, 6, 4, 8]

In [54]:
set(l)

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}

# Numpy 자료구조

In [55]:
# list 배열

v = [0.5,0.6,1.0]
m = [v,v,v]
m

[[0.5, 0.6, 1.0], [0.5, 0.6, 1.0], [0.5, 0.6, 1.0]]

In [56]:
v[0] = 'python'
m

[['python', 0.6, 1.0], ['python', 0.6, 1.0], ['python', 0.6, 1.0]]

In [57]:
#이렇게 바뀌는걸 방지하려면 

from copy import deepcopy
v = [0.5,0.6,1.0]
m = 3*[deepcopy(v),]
m

[[0.5, 0.6, 1.0], [0.5, 0.6, 1.0], [0.5, 0.6, 1.0]]

In [58]:
v[0] = 'python'
m

[[0.5, 0.6, 1.0], [0.5, 0.6, 1.0], [0.5, 0.6, 1.0]]

# 정규 Numpy 배열

In [59]:
import numpy as np
a = np.array([0,0.5,1.0,1.5,2.0])

In [60]:
a.sum()

5.0

In [61]:
a.std() # 표준편차

0.7071067811865476

In [62]:
a.cumsum() # 누적합

array([0. , 0.5, 1.5, 3. , 5. ])

In [63]:
a*2

array([0., 1., 2., 3., 4.])

In [64]:
a**2

array([0.  , 0.25, 1.  , 2.25, 4.  ])

In [65]:
b = np.array([a,a*2])
b

array([[0. , 0.5, 1. , 1.5, 2. ],
       [0. , 1. , 2. , 3. , 4. ]])

In [66]:
b.sum(axis = 0) # 열의 합

array([0. , 1.5, 3. , 4.5, 6. ])

In [67]:
b.sum(axis = 1) # 행의 합

array([ 5., 10.])

In [68]:
c = np.zeros((2,3,4),dtype = 'i', order = 'C')  # C = 행기반, F = 열기반
c

array([[[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]]], dtype=int32)

In [69]:
d = np.ones_like(c, dtype = 'f8', order = 'C')
d

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

In [70]:
# Example
import random 
I = 5000
%time mat = [[random.gauss(0,1) for j in range(I)] for i in range(I)]


Wall time: 15.1 s


In [71]:
print(mat[0])

[0.601216719760944, 0.6201363058252553, 0.5517728965035688, 0.33175163292939264, -1.4853275003715967, 1.0174926904550403, -0.9997672520922811, 0.7637551924389848, -0.27545796736642536, 0.3730989498947956, -0.43868270700870887, 0.3841297972396417, 2.5260732316294456, -1.7972912528719083, 0.9127517365860703, -0.9423632140715461, -0.5500139295090801, 0.9241629863673558, 1.2260585169950942, -1.4091189847648313, 0.31950333710307616, -0.8372405409852968, -0.5168052573138177, 0.4028098983764544, 0.49438764731260165, 0.6500712391743788, 0.2320373013248024, 1.7284127818716208, 0.8720086917906901, -1.7050976813496297, 1.044931493228132, -0.41257193472034753, 0.4010715240360909, -0.6207530793614787, 0.5348240567081607, -1.006021762552133, -0.40501572848839645, -0.07577043705478474, 1.0062052410330522, -1.6843079908595149, 1.1676925925798398, 1.2162540043362267, -0.2328661710562838, 1.113536105499214, 0.7876453692195271, -2.390044736488467, 0.4267832315355539, 0.12926477124920432, -2.2368226890181

In [72]:
%time reduce(lambda x, y : x + y, [reduce(lambda x,y : x + y,row) for row in mat])

Wall time: 1.72 s


3262.4602764267324

In [73]:
%time mat = np.random.standard_normal((I,I))

Wall time: 904 ms


In [74]:
%time mat.sum()

Wall time: 39.9 ms


-4307.0604006304975

# 구조화 배열

In [75]:
dt = np.dtype([('Name','S10'),('Age','i4'),
              ('Height','f'),('Children/Pets','i4',2)])

In [76]:
s = np.array([('smith',35,1.83,(0,1)),('jones',53,1.72,(2,2))],dtype = dt)

In [77]:
s

array([(b'smith', 35, 1.83, [0, 1]), (b'jones', 53, 1.72, [2, 2])],
      dtype=[('Name', 'S10'), ('Age', '<i4'), ('Height', '<f4'), ('Children/Pets', '<i4', (2,))])

In [78]:
s['Name']

array([b'smith', b'jones'], dtype='|S10')

In [79]:
s['Height'].mean()

1.7750001

# 코드 벡터화

In [80]:
r = np.random.standard_normal((4,3))
s = np.random.standard_normal((4,3))

In [81]:
r+s

array([[-0.70422806, -1.5380622 , -0.141136  ],
       [-2.11917114,  0.00831507,  1.02034256],
       [ 1.25741333,  1.01595865, -0.59612369],
       [-0.29492861, -3.31471979, -0.97189351]])

In [82]:
2*r+3

array([[3.0645438 , 2.12749601, 3.05053419],
       [2.79137315, 1.02019074, 3.64640367],
       [4.25323476, 2.75817914, 2.20971978],
       [0.80713423, 2.01118259, 0.18926311]])

In [83]:
s = np.random.standard_normal(3) # 1 x 3 짜리 

In [84]:
r+s # 각 행에 s 를 더해 -> 브로드캐스팅 동작

array([[-0.91097352,  0.56225726,  0.94762971],
       [-1.04755884,  0.00860463,  1.24556446],
       [-0.31662804,  0.87759883,  0.52722251],
       [-2.0396783 ,  0.50410055, -0.48300583]])

In [85]:
s = np.random.standard_normal(4)
r+s              # 브로드캐스팅 불가능

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [86]:
r.transpose() + s  # r을 4 x 3 으로 만들면 브로트캐스팅 가능

array([[-0.33741324, -0.84048496, -1.52731124, -0.06420421],
       [-0.80593714, -1.72607617, -2.27483905,  0.53781997],
       [-0.34441805, -0.4129697 , -2.54906873, -0.37313978]])

In [87]:
# 함수에서도 가능

def f(x):
    return 3 * x + 5

print(f(0.5))
print(f(r))

6.5
[[5.0968157  3.69124402 5.07580128]
 [4.68705973 2.0302861  5.96960551]
 [6.87985214 4.63726871 3.81457967]
 [1.71070135 3.51677389 0.78389467]]


In [88]:
# 하지만 math 함수 불가능 / np 함수는 가능

import math

print(np.sin(r))

print(math.sin(r))


[[ 0.0322663  -0.42254547  0.02526441]
 [-0.10412435 -0.83597365  0.31760424]
 [ 0.58640814 -0.12061604 -0.38493751]
 [-0.88958366 -0.47451125 -0.98634798]]


TypeError: only size-1 arrays can be converted to Python scalars

# 메모리 배치

In [89]:
x = np.random.standard_normal((5,10000000))
y = 2*x+3
C = np.array((x,y), order = 'C')
F = np.array((x,y), order = 'F')
x = 0.0
y = 0.0

In [90]:
%timeit C.sum()
%timeit F.sum() # 전체 합계를 구할땐 차이 거의 없다

140 ms ± 3.12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
140 ms ± 1.61 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [91]:
%timeit C[0].sum(axis = 0)
%timeit C[0].sum(axis = 1) # 원소 개수가 많은 소수 벡터 더하는게 더 느려

69.3 ms ± 2.28 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
73.1 ms ± 2.08 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [92]:
%timeit F.sum(axis=0)
%timeit F.sum(axis=1) # 원소 개수가 많은 소수 벡터 더하는게 더 빨라

# 하지만 C 방식이 F방식보다 빨라 

465 ms ± 6.11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1.33 s ± 26.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
