Sorting and set operations

In [1]:
import numpy as np

unsorted = np.random.randn(10)
print(unsorted)

[-0.71221795  0.2064967   1.05344318 -0.39263391  0.73551064 -0.20263208
 -1.39128698 -0.4312167   2.7378222   1.12982495]


In [2]:
sorted = np.array(unsorted)
sorted.sort()
print(sorted)

[-1.39128698 -0.71221795 -0.4312167  -0.39263391 -0.20263208  0.2064967
  0.73551064  1.05344318  1.12982495  2.7378222 ]


In [3]:
# 중복된 것 없애줌
array = np.array([1,2,3,1,2,3])
np.unique(array)

array([1, 2, 3])

In [4]:
# set operation
s1 = np.array(["desk", "chair", "bulb"])
s2 = np.array(["lamp", "bulb", "chair"])
print(np.intersect1d(s1,s2)) # 겹치는 것(교집합) 출력
print(np.union1d(s1,s2)) # 중복 제거한 전체 출력
print(np.setdiff1d(s1,s2)) # s1에서 s2를 뺀 차집합 출력
print(np.in1d(s1,s2)) # s1이면서 s2인 element는 true

['bulb' 'chair']
['bulb' 'chair' 'desk' 'lamp']
['desk']
[False  True  True]


Broadcasting

In [5]:
start = np.zeros((4,3))
add_rows = np.array([1,0,2])
print(start)
print(add_rows)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[1 0 2]


In [6]:
# use broadcast to add to each row of 'start'
# broadcast가 자동으로 부족한 row 맞춰줌
y = start + add_rows
print(y)

[[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]


In [7]:
# notice [[ ]] not [ ]
add_cols = np.array([[0,1,2,3]])
add_cols = add_cols.T
# vector를 transpose하면 vector가 된다.
# 그래서 matrix로 만들고 transpose 해야 함
print(add_cols)
# broadcast가 자동으로 부족한 column 맞춰줌
y = start + add_cols
print(y)

[[0]
 [1]
 [2]
 [3]]
[[0. 0. 0.]
 [1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


In [8]:
add_scalar = np.array([1])
print(add_scalar)
# it will broadcast in both directions
print(start + add_scalar)

[1]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


Speed test ndarray vs. python list : 넘파이가 훨씬 빠르다

In [11]:
import numpy as np
from timeit import Timer

size = 1000000
numberOfOperation = 1500
timeits = 1000

nd_array = np.arange(size)
print(nd_array.shape, type(nd_array))

(1000000,) <class 'numpy.ndarray'>


In [12]:
timer_numpy = Timer("nd_array.sum()", "from __main__ import nd_array")
print("Time taken by numpy ndarray : %f seconds" %
     (timer_numpy.timeit(numberOfOperation)/timeits))

Time taken by numpy ndarray : 0.000635 seconds


In [13]:
a_list = list(range(size))
print(type(a_list), len(a_list))

<class 'list'> 1000000


In [14]:
timer_list = Timer("sum(a_list)", "from __main__ import a_list")
print("Time taken by list : %f seconds" %
     (timer_list.timeit(numberOfOperation)/timeits))

Time taken by list : 0.035373 seconds
