In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
arr1 = np.array([1, 2, 3])
print(f'{arr1}, {type(arr1)}, {arr1.shape}')

[1 2 3], <class 'numpy.ndarray'>, (3,)


In [3]:
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
print(f'{arr2}, {type(arr2)}, {arr2.shape}')

[[1 2 3]
 [4 5 6]], <class 'numpy.ndarray'>, (2, 3)


In [4]:
arr3 = np.array([[1, 2, 3]])
print(f'{arr3}, {type(arr3)}, {arr3.shape}, {arr3.ndim}D.')

[[1 2 3]], <class 'numpy.ndarray'>, (1, 3), 2D.


In [5]:
# 차원 확인
print(f'arr1 : {arr1.ndim}D.')
print(f'arr2 : {arr2.ndim}D.')
print(f'arr3 : {arr3.ndim}D.')

arr1 : 1D.
arr2 : 2D.
arr3 : 2D.


In [6]:
# arange : array + range
print(np.arange(3))
print(np.arange(1, 20, 3))

[0 1 2]
[ 1  4  7 10 13 16 19]


In [7]:
# numpy array 초기화
np.zeros((2, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

* np.zeros_like() : () 안의 배열과 동일한 모양을 가진 배열을 0으로 채워서 생성하기

In [9]:
one_a = np.ones((3, 4))
print(one_a)
print(np.zeros_like(one_a))

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [10]:
one_b = np.array([[1, 2, 3], [4, 5, 6]])
print(one_b)
print(np.zeros_like(one_b))

[[1 2 3]
 [4 5 6]]
[[0 0 0]
 [0 0 0]]


In [11]:
# np.full()
print(np.full((4, 3), 9))

[[9 9 9]
 [9 9 9]
 [9 9 9]
 [9 9 9]]


In [15]:
# np.random.random() vs. np.random.rand()
# seed 생성
np.random.seed(1)

# random : 0 ~ 1 중 무작위 수
# 배열의 형태(파라미터)를 튜플 형태로 받는다.
print(np.random.random((3, 4)))

# rand : 0 ~ 1 중 무작위 수
# 여러 개의 파라미터를 받아 차원을 설정할 수 있다.
print(np.random.rand(3, 4))
print(np.random.rand(2, 3, 4))

# randn : 표준정규분포
print(np.random.randn(3, 4))
# 표준화 : (x - x_Bar) / std

[[4.17022005e-01 7.20324493e-01 1.14374817e-04 3.02332573e-01]
 [1.46755891e-01 9.23385948e-02 1.86260211e-01 3.45560727e-01]
 [3.96767474e-01 5.38816734e-01 4.19194514e-01 6.85219500e-01]]
[[0.20445225 0.87811744 0.02738759 0.67046751]
 [0.4173048  0.55868983 0.14038694 0.19810149]
 [0.80074457 0.96826158 0.31342418 0.69232262]]
[[[0.87638915 0.89460666 0.08504421 0.03905478]
  [0.16983042 0.8781425  0.09834683 0.42110763]
  [0.95788953 0.53316528 0.69187711 0.31551563]]

 [[0.68650093 0.83462567 0.01828828 0.75014431]
  [0.98886109 0.74816565 0.28044399 0.78927933]
  [0.10322601 0.44789353 0.9085955  0.29361415]]]
[[-0.69166075 -0.39675353 -0.6871727  -0.84520564]
 [-0.67124613 -0.0126646  -1.11731035  0.2344157 ]
 [ 1.65980218  0.74204416 -0.19183555 -0.88762896]]


In [17]:
# np.eye() : 항등 행렬
# A * I = A
print(np.eye(4))

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [18]:
# 일반 행렬과 항등 행렬과의 내적은 자기 자신이다.

A = np.array([[2, 3, 1, 5],
              [8, 1, 3, 4],
              [5, 6, 7, 8],
              [1, 2, 3, 4]
              ])
print(A)

I = np.eye(4)
print(I)

print(A@I)

[[2 3 1 5]
 [8 1 3 4]
 [5 6 7 8]
 [1 2 3 4]]
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
[[2. 3. 1. 5.]
 [8. 1. 3. 4.]
 [5. 6. 7. 8.]
 [1. 2. 3. 4.]]


In [19]:
# numpy array 크기

arr = np.arange(12)
print(f'{arr} --> {arr.shape}')
arr2 = arr.reshape(3, -1)
print(f'{arr2} --> {arr2.shape}')

[ 0  1  2  3  4  5  6  7  8  9 10 11] --> (12,)
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]] --> (3, 4)


In [20]:
# order = 'F' : 값을 수직으로 채워넣음.
arr.reshape(4, -1, order='F')

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [22]:
arr1 = np.arange(10)
print(arr1)

arr2 = arr1.reshape(-1, 5)
print(f'{arr2} --> {arr2.ndim}')

[0 1 2 3 4 5 6 7 8 9]
[[0 1 2 3 4]
 [5 6 7 8 9]] --> 2


In [23]:
arr3 = np.arange(12)
print(f'{arr3} --> {arr3.shape}')

arr4 = arr3.reshape(2, 3, 2)
print(f'{arr4} --> {arr4.ndim}. {arr4.shape}')

[ 0  1  2  3  4  5  6  7  8  9 10 11] --> (12,)
[[[ 0  1]
  [ 2  3]
  [ 4  5]]

 [[ 6  7]
  [ 8  9]
  [10 11]]] --> 3. (2, 3, 2)


In [24]:
arr5 = arr3.reshape(2, 3, 2, order='F')
print(f'{arr5} --> {arr5.ndim}.')

[[[ 0  6]
  [ 2  8]
  [ 4 10]]

 [[ 1  7]
  [ 3  9]
  [ 5 11]]] --> 3.


In [25]:
# 1차원으로 변환
print(f'{arr2}\n{arr2.flatten()}')

[[0 1 2 3 4]
 [5 6 7 8 9]]
[0 1 2 3 4 5 6 7 8 9]


In [28]:
# numpy array 데이터 추출
# indexing
arr1 = np.arange(1, 10)
print(arr1)
print(arr1[2], arr1[-2])

[1 2 3 4 5 6 7 8 9]
3 8


In [29]:
arr2 = arr1.reshape(3, -1)
print(f'{arr2} --> {arr2.ndim}')
print(arr2[0, 0])
print(arr2[-1, -2])
print(arr2[-1, :-1])

[[1 2 3]
 [4 5 6]
 [7 8 9]] --> 2
1
8
[7 8]


In [31]:
print(arr1)

arr2 = arr1[0:3]
print(arr2)

arr3 = arr2[1:]
print(arr3)

arr4 = arr1.reshape(3, -1)
print(arr4)

[1 2 3 4 5 6 7 8 9]
[1 2 3]
[2 3]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [34]:
# 슬라이싱
print(arr4)
print(arr4[:2, :2])
print(arr4[1:3, :3])
print(arr4[:2, 1:])


[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 2]
 [4 5]]
[[4 5 6]
 [7 8 9]]
[[2 3]
 [5 6]]


In [45]:
# 내적
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8], [9, 10], [1, 2]])

print(f'{arr1}\n{arr2}')
print(f'{arr1 @ arr2} --> 내적')
print(np.dot(arr1, arr2))

# 전치 행렬
print(f'\n{arr1}\n⊙\n{arr1.T}\n내적\n{arr1@arr1.T}')
print(arr1)
print(np.transpose(arr1))

[[1 2 3]
 [4 5 6]]
[[ 7  8]
 [ 9 10]
 [ 1  2]]
[[28 34]
 [79 94]] --> 내적
[[28 34]
 [79 94]]

[[1 2 3]
 [4 5 6]]
⊙
[[1 4]
 [2 5]
 [3 6]]
내적
[[14 32]
 [32 77]]
[[1 2 3]
 [4 5 6]]
[[1 4]
 [2 5]
 [3 6]]


## numpy를 이용한 기술통계

In [47]:
x = np.array([18, 26, 56, 9, 76, 34, -2])
print(len(x))
# 평균
print('MEAN :', np.mean(x))
# 분산
print('VARIATION :', np.var(x))
# 표준편차
print('STANDARD DEVIATION :', np.std(x))

7
MEAN : 31.0
VARIATION : 632.2857142857143
STANDARD DEVIATION : 25.145292089886613


In [50]:
print(x)
print('MAX :', np.max(x))
print('MIN :', np.min(x))
print('MEDIAN :', np.median(x))
x.sort()
print(x)

[18 26 56  9 76 34 -2]
MAX : 76
MIN : -2
MEDIAN : 26.0
[-2  9 18 26 34 56 76]


In [54]:
# 사분위수 --> boxplot에서 사용됨.

print('Q1 :', np.percentile(x, 25))
# Q2는 중앙값과 같다.
print('Q2 :', np.percentile(x, 50))
print('Q3 :', np.percentile(x, 75))
print('IQR :', np.percentile(x, 75) - np.percentile(x, 25))
print('OUTLIER :', np.percentile(x, 25), '보다 작고', np.percentile(x, 75), '보다 크다.')

Q1 : 13.5
Q2 : 26.0
Q3 : 45.0
IQR : 31.5
OUTLIER : 13.5 보다 작고 45.0 보다 크다.
