## 넘파이 ndarray 개요

In [3]:
import numpy as np

In [11]:
array1 = np.array([1,2,3])
print('array1 type: ', type(array1))
print('array 1 array 형태:', array1.shape)

array2 = np.array([[1,2,3],
                  [2,3,4]])
print('array2 type: ', type(array2))
print('array2 array 형태:', array2.shape)

array3 = np.array([[1,2,3]])
print('array3 type: ', type(array3))
print('array3 array 형태:', array3.shape)

print('array1: {:0}차원, array2: {:1}차원, array3: {:2}차원'.format(array1.ndim, array2.ndim, array3.ndim))

array1 type:  <class 'numpy.ndarray'>
array 1 array 형태: (3,)
array2 type:  <class 'numpy.ndarray'>
array2 array 형태: (2, 3)
array3 type:  <class 'numpy.ndarray'>
array3 array 형태: (1, 3)
array1: 1차원, array2: 2차원, array3:  2차원


### ndarray의 데이터 타입

all data types in a ndarray must be the same (Ex. there should be no `int` and `float` in the same array)
if not, the data will be converted to the most general type.

In [19]:
# int
list1= [1,2,3]
print(type(list1))

array1 = np.array(list1)
print(type(array1))
print('only int: ', array1, array1.dtype)

# only string
list2 = ['test1', 'test2', 'test3']
array2 = np.array(list2)
print('only string: ', array2, array2.dtype)

# int and string -> string
list3 = [1,2,'test']
array3 = np.array(list3)
print('int and string: ', array3, array3.dtype)

# int and float -> float
list4 = [1,2,3.0]
array4 = np.array(list4)
print('int and float: ', array4, array4.dtype)

<class 'list'>
<class 'numpy.ndarray'>
only int:  [1 2 3] int64
only string:  ['test1' 'test2' 'test3'] <U5
int and string:  ['1' '2' 'test'] <U21
int and float:  [1. 2. 3.] float64


In [22]:
# 4byte -> 8byte
array_int = np.array([1,2,3])
array_float = array_int.astype(np.float64)
print('array_int: ', array_int, array_int.dtype)
print('array_float: ', array_float, array_float.dtype)

# when change dtype to int, data will be truncated (8byte -> 4byte)
array_float2 = np.array([1.1, 2.2, 3.3])
array_int2 = array_float2.astype(np.int32)
print('array_float2: ', array_float2, array_float2.dtype)
print('array_int2: ', array_int2, array_int2.dtype)

array_int:  [1 2 3] int64
array_float:  [1. 2. 3.] float64
array_float2:  [1.1 2.2 3.3] float64
array_int2:  [1 2 3] int32


### ndarray 편리하게 생성하기

In [23]:
# default starting point is 0
seq_array = np.arange(10)
print(seq_array)
print(seq_array.dtype, seq_array.shape)

[0 1 2 3 4 5 6 7 8 9]
int64 (10,)


In [26]:
zero_array = np.zeros((3,2), dtype='int32')
print(zero_array)
print(zero_array.dtype, zero_array.shape)

one_array = np.ones((3,2))
print(one_array)
print(one_array.dtype, one_array.shape)

[[0 0]
 [0 0]
 [0 0]]
int32 (3, 2)
[[1. 1.]
 [1. 1.]
 [1. 1.]]
float64 (3, 2)


### reshape()

In [27]:
array1 = np.arange(10)
print(array1)

array2 = array1.reshape(2,5)
print(array2)

array3 = array1.reshape(5,2)
print(array3)

# wrong example, there should be no remainder left
array4 = array1.reshape(9,1)
print(array4)

[0 1 2 3 4 5 6 7 8 9]
[[0 1 2 3 4]
 [5 6 7 8 9]]
[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]


ValueError: cannot reshape array of size 10 into shape (9,1)

reshape에 `-1`을 사용하면 현재 배열의 차원을 유지한 채 나머지 차원을 자동으로 계산해준다.

In [29]:
array1 = np.arange(10)
print('array1: ', array1)

array2 = array1.reshape(5, -1)
print('array2: ', array2)

array3 = array1.reshape(-1, 2)
print('array3: ', array3)

# same as array1
array4 = array1.reshape(-1)
print('array4: ', array4)

array1:  [0 1 2 3 4 5 6 7 8 9]
array2:  [[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
array3:  [[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
array4:  [0 1 2 3 4 5 6 7 8 9]


In [35]:
array1 = np.array([[[1,2,3]]])
array2 = array1.reshape(-1,1)
print('array1: ', array1, array1.shape)
print('array2: ', array2, array2.shape)


array1:  [[[1 2 3]]] (1, 1, 3)
array2:  [[1]
 [2]
 [3]] (3, 1)


tolist()는 배열을 리스트로 변환해줌 -> can enhance readability

In [40]:
array1d = np.arange(0,8)
array3d = array1d.reshape(2,2,2)
print('array1d: ', array1d, array1d.shape)
print('array3d: ', array3d, array3d.shape)
print('array3d with tolist(): ', array3d.tolist())

array2d = array3d.reshape(-1,1)
print('array2d: ', array2d, array2d.shape)
print('array2d with tolist(): ', array2d.tolist())



array1d:  [0 1 2 3 4 5 6 7] (8,)
array3d:  [[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]] (2, 2, 2)
array3d with tolist():  [[[0, 1], [2, 3]], [[4, 5], [6, 7]]]
array2d:  [[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]] (8, 1)
array2d with tolist():  [[0], [1], [2], [3], [4], [5], [6], [7]]


### 인덱싱

In [46]:
array1 = np.arange(1,10)
print('array1: ', array1)
val = array1[2]
print('세 번째 요소: ', val)
print(type(val))

array1[1] = 1
array1[-2] = 9

print('changed array1: ', array1)

array1:  [1 2 3 4 5 6 7 8 9]
세 번째 요소:  3
<class 'numpy.int64'>
changed array1:  [1 1 3 4 5 6 7 9 9]


In [52]:
array1d = np.arange(1,10)
array2d = array1d.reshape(3,3)
print(array2d.tolist())

print('row 0, column 0: ', array2d[0,0])
print('row 0, column 1: ', array2d[0,1])
print('row 0, column 2: ', array2d[0,2])
print('row 1, column 0: ', array2d[1,0])
print('row 1, column 1: ', array2d[1,1])

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
row 0, column 0:  1
row 0, column 1:  2
row 0, column 2:  3
row 1, column 0:  4
row 1, column 1:  5


slicing

In [61]:
array2d = np.arange(1,10).reshape(3,3)
print('array2d: ', array2d)

print('slicing row 0: ', array2d[:1])
print('slicing column 1: ', array2d[:,1])

array2d:  [[1 2 3]
 [4 5 6]
 [7 8 9]]
slicing row 0:  [[1 2 3]]
slicing column 1:  [2 5 8]


In [62]:
array1d = np.arange(1,10)
array2d = array1d.reshape(3,3)

print(array2d[0:3, 0:3])
print(array2d[:,:])
print(array2d[0:2, 0:2])
print(array2d[0:2, 1:3])
print(array2d[1:3, 0:2])
print(array2d[1:3, 1:3])


[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 2]
 [4 5]]
[[2 3]
 [5 6]]
[[4 5]
 [7 8]]
[[5 6]
 [8 9]]


fancy indexing: only selects specific elements from the array

In [71]:
array1d = np.arange(1,10)
array2d = array1d.reshape(3,3)

array3 = array2d[[0,1], 2] # row 0,1 and column 2
print('array2d[[0,1], 2]: ', array3.tolist())

array4 = array2d[[0,1], 0:2] # row 0,1 and column 0,1
print('array2d[[0,1], 0:2]: ', array4.tolist())

array5 = array2d[[0,1]] # row 0,1 and all columns
print('array2d[[0,1]]: ', array5.tolist())

array2d[[0,1], 2]:  [3, 6]
array2d[[0,1], 0:2]:  [[1, 2], [4, 5]]
array2d[[0,1]]:  [[1, 2, 3], [4, 5, 6]]


boolean indexing: only selects elements where the condition is true

In [72]:
array1d = np.arange(1,10)
array1 = array1d[array1d>5]
print('array1d>5: ', array1.tolist())

array1d>5:  [6, 7, 8, 9]


In [74]:
array1d > 5 #output: array of boolean values (True if the element satisfies the condition)

array([False, False, False, False, False,  True,  True,  True,  True])

True값에 해당하는 **index**만 저장하는 것임

저장된 index 데이터 세트로 ndarray 조회

### 행렬의 정렬 sort() and argsort()

In [79]:
org_array = np.array([3,1,9,5])
print('org_array: ', org_array)

np_sorted_array = np.sort(org_array)
print('np_sorted_array: ', np_sorted_array)

ndarray_sorted_array = org_array.sort() # changes the original array and returns None
print('ndarray_sorted_array: ', ndarray_sorted_array)
print('org_array after sort:', org_array)




org_array:  [3 1 9 5]
np_sorted_array:  [1 3 5 9]
ndarray_sorted_array:  None
org_array after sort: [1 3 5 9]


In [83]:
# Descending order

np_sorted_desc_array = np.sort(org_array)[::-1]
print('np_sorted_desc_array: ', np_sorted_desc_array)

np_sorted_desc_array:  [9 5 3 1]


In [87]:
array2d = np.array([[8,12],
                   [7,1]])

sort_array2d_axis0 = np.sort(array2d, axis=0)
print('sort array2d by row:\n', sort_array2d_axis0)

sort_array2d_axis1 = np.sort(array2d, axis=1)
print('sort array2d by column:\n', sort_array2d_axis1)

sort array2d by row:
 [[ 7  1]
 [ 8 12]]
sort array2d by column:
 [[ 8 12]
 [ 1  7]]


In [90]:
org_array = np.array([3,1,9,5])
sort_indices = np.argsort(org_array)
print(type(sort_indices))
print('original array index after sorting: ', sort_indices)

<class 'numpy.ndarray'>
original array index after sorting:  [1 0 3 2]


In [91]:
sort_indices_desc = np.argsort(org_array)[::-1]
print('original array index after sorting in descending order: ', sort_indices_desc)

original array index after sorting in descending order:  [2 3 0 1]


In [93]:
name_array = np.array(['John', 'Mike', 'Sarah', 'Kate', 'Samuel'])
score_array = np.array([78, 95, 84, 98, 84])
sort_indices_asc = np.argsort(score_array)
print('indices of score array after sorting: ', sort_indices_asc)
print('name_array after sorting by score: ', name_array[sort_indices_asc])

indices of score array after sorting:  [0 2 4 1 3]
name_array after sorting by score:  ['John' 'Sarah' 'Samuel' 'Mike' 'Kate']


### 선형대수 연산 - 행렬 내적과 전치 행렬 구하기

행렬내적

In [96]:
A = np.array([[1,2,3],
             [4,5,6]])

B = np.array([[7,8],
             [9,10],
             [11,12]])

dot_product = np.dot(A,B)
print('dot product of A and B:\n', dot_product)

dot product of A and B:
 [[ 58  64]
 [139 154]]


전치행렬

In [98]:
A = np.array([[1,2],
             [3,4]])

transpose_mat = np.transpose(A)
print('transposed A:\n', transpose_mat)

transposed A:
 [[1 3]
 [2 4]]


In [11]:
a = [1, 2]

def swap(a):
    a[0], a[1] = a[1], a[0]

swap(a)

print(a[:-1])

print(a[::-1])

print(a[::2])



[2]
[1, 2]
[2]
