### Numpy

#### 1. Numpy Array and Operation
- Learn about basic methods of Numpy
- Learn about initialization methods of numpy array and operations

##### 1.1 Numpy Array Creation

In [2]:
# globally used 
import numpy as np

In [2]:
# python list
data = [1,2,3,4,5]
data

[1, 2, 3, 4, 5]

In [3]:
# python list 2-Dim
data2 = [[1,2,3],[4,5,6],[7,8,9]]
data2

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [6]:
# list to numpy array
arr1 = np.array(data)
arr1

array([1, 2, 3, 4, 5])

In [9]:
# 2-dim list -> np.array is like matrix
arr2 = np.array(data2)
arr2

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [11]:
# 0-9 auto-generation
arr1 = np.array(list(range(10)))
arr2 = np.arange(10)
arr2

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [13]:
np.arange(10, 30)

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
       27, 28, 29])

##### 1.2. Reshaping array

In [14]:
# numpy array shape
arr1 = np.array(data)
arr2 = np.array(data2)
print(arr1.shape, arr2.shape)

(5,) (3, 3)


In [16]:
# generate 3x3 array
x = np.array([[1,2,3],[4,5,6],[7,8,9]])
x

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [17]:
# above mehtod is so tired -> use reshape method!
np.arange(1,10).reshape(3,3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [27]:
# flat maxtrix
x.reshape(-1,)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

##### 1.3. Concatenation of arrays

In [29]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])

In [31]:
# arr1 + arr2 = ? is different from list(arr1) + list(arr2)
arr1 + arr2

array([5, 7, 9])

In [32]:
# Concatenation
np.concatenate([arr1, arr2])

array([1, 2, 3, 4, 5, 6])

In [34]:
# stacking vertically
np.vstack([arr1, arr2])

array([[1, 2, 3],
       [4, 5, 6]])

In [35]:
# stacking horizontally
np.hstack([arr1, arr2])

array([1, 2, 3, 4, 5, 6])

##### 1.4. Array Arithmetic (like vector)

In [36]:
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])

In [37]:
# vector addition
v1 + v2

array([5, 7, 9])

In [38]:
# vector subtraction
v2 - v1

array([3, 3, 3])

In [39]:
# (not vector operation) elementwise multiplication
v1 * v2

array([ 4, 10, 18])

In [40]:
# (not vector operation) elementwise division
v1 / v2

array([0.25, 0.4 , 0.5 ])

In [41]:
# dot product
v1 @ v2

32

##### 1.5. Broadcast and Universal Function
- When calculating numpy arrays of different sizes, the calculation is automatically broadcast.

In [42]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([[-1, -1, -1], [1, 1, 1]])
arr2.shape

(2, 3)

In [43]:
# is possible arr1 + arr2 ?
arr1 + arr2

array([[0, 1, 2],
       [2, 3, 4]])

In [44]:
# multiplication
arr1 * arr2

array([[-1, -2, -3],
       [ 1,  2,  3]])

- Universal Function : extension of broadcast, like iteration effect

In [46]:
# f = lambda x : 1/x
f = lambda x : 1/x
f(arr1)

array([1.        , 0.5       , 0.33333333])

In [47]:
f = lambda x : x  + 2
f(arr1)

array([3, 4, 5])

##### 1.6. Indexing

In [49]:
print(arr1[0], arr1[-1], arr1[:3])

1 3 [1 2 3]


In [51]:
arr2 = np.arange(12).reshape(3, 4)
arr2

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [55]:
# 2row, 3column = 6
arr2[1,2]

6

In [56]:
# 3rd column
arr2[:,2]

array([ 2,  6, 10])

In [57]:
# 2nd row
arr2[1,:]

array([4, 5, 6, 7])

#### 1. Numpy Methods
- various methods of numpy array

##### 2.1. Math Functions

In [4]:
# 5 x 3 matrix by random sampling from normal distribution
mat1 = np.random.randn(5, 3)
mat1


array([[-0.61587347,  0.87346097,  0.34312245],
       [ 0.1171904 , -0.19955318,  0.53082718],
       [-2.12108092, -0.94407366, -0.63280341],
       [-0.7606765 ,  0.91095467,  0.33667099],
       [-0.28797125,  1.04336076,  0.51158281]])

In [5]:
# absolute value in numpy array
np.abs(mat1)

array([[0.61587347, 0.87346097, 0.34312245],
       [0.1171904 , 0.19955318, 0.53082718],
       [2.12108092, 0.94407366, 0.63280341],
       [0.7606765 , 0.91095467, 0.33667099],
       [0.28797125, 1.04336076, 0.51158281]])

In [6]:
# square of numpy array
np.square(mat1)

array([[0.37930013, 0.76293407, 0.11773301],
       [0.01373359, 0.03982147, 0.2817775 ],
       [4.49898426, 0.89127508, 0.40044016],
       [0.57862873, 0.82983841, 0.11334735],
       [0.08292744, 1.08860168, 0.26171697]])

In [7]:
# square root in numpy array
np.sqrt(mat1) 

  np.sqrt(mat1)


array([[       nan, 0.93459134, 0.58576655],
       [0.34233083,        nan, 0.72857888],
       [       nan,        nan,        nan],
       [       nan, 0.95443945, 0.58023356],
       [       nan, 1.02145032, 0.71525017]])

- nan = not a number (imaginary number)

In [None]:
# linear algebra functions
vec = np.array([1, 2, 2])

In [10]:
# 1. norm
np.linalg.norm(vec)

3.0

In [13]:
# 2. eigenvalue
square_matrix = np.array([[1, 2], [3,4]])
np.linalg.eig(square_matrix)

EigResult(eigenvalues=array([-0.37228132,  5.37228132]), eigenvectors=array([[-0.82456484, -0.41597356],
       [ 0.56576746, -0.90937671]]))

##### 2.2. Aggregation functions

In [14]:
mat2 = np.random.rand(3,2) # 0~1
mat2

array([[0.36759715, 0.54039778],
       [0.8745329 , 0.20108934],
       [0.90845413, 0.2150255 ]])

In [16]:
# Summation
np.sum(mat2)

3.1070968027762573

In [17]:
# Summation of rows
np.sum(mat2, axis = 1)

array([0.90799494, 1.07562224, 1.12347963])

In [18]:
# Summation of cols
np.sum(mat2, axis = 0)

array([2.15058418, 0.95651262])

In [20]:
# mean
np.mean(mat2)
np.mean(mat2, axis = 1)
np.mean(mat2, axis = 0)

array([0.71686139, 0.31883754])

In [22]:
# std
np.std(mat2)

0.2872045123648403

In [23]:
# min
np.min(mat2, axis=0)

array([0.36759715, 0.20108934])

In [24]:
# max
np.max(mat2, axis=1)

array([0.54039778, 0.8745329 , 0.90845413])

In [25]:
# arg min
np.argmin(mat2, axis=0)

array([0, 1], dtype=int64)

In [28]:
# arg max
np.argmax(mat2, axis=1)

array([1, 0, 0], dtype=int64)

In [30]:
# sorting
np.sort(mat2) # default axis = 1

array([[0.36759715, 0.54039778],
       [0.20108934, 0.8745329 ],
       [0.2150255 , 0.90845413]])

In [31]:
# sorting by column
np.sort(mat2, axis=0)

array([[0.36759715, 0.20108934],
       [0.8745329 , 0.2150255 ],
       [0.90845413, 0.54039778]])

In [33]:
# numpy sorting only ascending -> how decending?
np.sort(mat2, axis=0)[::-1]

array([[0.90845413, 0.54039778],
       [0.8745329 , 0.2150255 ],
       [0.36759715, 0.20108934]])

In [35]:
# sorted index result
np.argsort(mat2, axis=0)


array([[0, 1],
       [1, 2],
       [2, 0]], dtype=int64)

#### 3. Performance Check
- Universal Function 기능을 통해 반복문을 사용한 것보다 훨씬 빠른 성능을 낸다.
- 직접 실험을 통해 그 차이를 확인해 본다.

In [36]:
def reverse_num(values):
    output = np.empty(len(values))

    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    
    return output

In [39]:
big_array = np.random.randint(1, 100, 1000000)
big_array

array([21, 75, 93, ..., 23, 32, 87])

In [40]:
%timeit reverse_num(big_array)

1.87 s ± 28.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [41]:
%timeit 1.0 / big_array

2.29 ms ± 140 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


- universal function 은 python 을 사용하지 않고 numpy 내부 cpp 코드를 사용하여 그 속도 차이가 크다.

#### 4. 수식구현

In [10]:
# cosine distance

def cosine_distance(x, y):
    n1 = np.linalg.norm(x)
    n2 = np.linalg.norm(y)

    cosine_sim = x @ y / n1*n2
    print(n1, n2, cosine_sim)
    return 1 - cosine_sim

In [11]:
v1 = np.array([1,2,3,-4])
v2 = np.array([-1,2,5,1])
print(cosine_distance(v1, v2))

5.477225575051661 5.5677643628300215 14.231420636511777
-13.231420636511777
