# NumPy
- NumPy is the fundamental package for scientific computing with Python. It contains among other things:
  - a powerful N-dimensional array object
  - sophisticated (broadcasting) functions
  - tools for integrating C/C++ and Fortran code
  - useful linear algebra, Fourier transform, and random number capabilities

Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data. Arbitrary data-types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of databases.

NumPy is licensed under the BSD license, enabling reuse with few restrictions.

numpy를 사용하는 궁극적인 이유는 다음과 같다.
- 속도가 빠름
  - Vectorization
  - C 기반으로 만들어진 ndarray data type
- 사용하기 편하고 쉬움

## 참고 해볼만한 싸이트
- [NumPy Reference](https://docs.scipy.org/doc/numpy/reference/?v=20190722141030)
- [Cython](http://docs.cython.org/en/latest/index.html)

In [1]:
import numpy as np
np.__version__

'1.16.4'

In [2]:
dir(np)

['ALLOW_THREADS',
 'AxisError',
 'BUFSIZE',
 'CLIP',
 'DataSource',
 'ERR_CALL',
 'ERR_DEFAULT',
 'ERR_IGNORE',
 'ERR_LOG',
 'ERR_PRINT',
 'ERR_RAISE',
 'ERR_WARN',
 'FLOATING_POINT_SUPPORT',
 'FPE_DIVIDEBYZERO',
 'FPE_INVALID',
 'FPE_OVERFLOW',
 'FPE_UNDERFLOW',
 'False_',
 'Inf',
 'Infinity',
 'MAXDIMS',
 'MAY_SHARE_BOUNDS',
 'MAY_SHARE_EXACT',
 'MachAr',
 'NAN',
 'NINF',
 'NZERO',
 'NaN',
 'PINF',
 'PZERO',
 'RAISE',
 'SHIFT_DIVIDEBYZERO',
 'SHIFT_INVALID',
 'SHIFT_OVERFLOW',
 'SHIFT_UNDERFLOW',
 'ScalarType',
 'Tester',
 'TooHardError',
 'True_',
 'UFUNC_BUFSIZE_DEFAULT',
 'UFUNC_PYVALS_NAME',
 'WRAP',
 '_NoValue',
 '_UFUNC_API',
 '__NUMPY_SETUP__',
 '__all__',
 '__builtins__',
 '__cached__',
 '__config__',
 '__doc__',
 '__file__',
 '__git_revision__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_add_newdoc_ufunc',
 '_arg',
 '_distributor_init',
 '_globals',
 '_mat',
 '_pytesttester',
 'abs',
 'absolute',
 'absolute_import',
 'add',
 'add

## Disign Pattern - Factory Method

In [3]:
x = np.array([1, 2])

In [4]:
x

array([1, 2])

In [5]:
type(x)

numpy.ndarray

In [6]:
x = np.array([[1, 2], [3, 4]])

In [7]:
x

array([[1, 2],
       [3, 4]])

In [8]:
x = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

In [9]:
x

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

In [10]:
y = np.ndarray(0)

In [11]:
y

array([], dtype=float64)

ndarray는 호모지니어스한 자료 구조이기 때문에 각 요소에 다른 dtype이 존재하면 한 가지 dtype으로 통일한다.

In [12]:
y = np.array([1, '2', 3])

In [13]:
y

array(['1', '2', '3'], dtype='<U21')

위에서 선언한 방식대로 똑같이 instance화 시키려고 한다면 첫 번째 인자로 shape을 받기 때문에 다음과 같이 Error가 발생한다.

```
shape : tuple of ints
Shape of created array.
```

In [14]:
y = np.ndarray([1, '2', 3])

TypeError: 'str' object cannot be interpreted as an integer

In [15]:
y = np.ndarray((2, 2))

In [16]:
y

array([[1.83775424e-316, 0.00000000e+000],
       [0.00000000e+000,             nan]])

In [17]:
x = np.array([[1, 2], [3, 4]])

In [18]:
x

array([[1, 2],
       [3, 4]])

### np.shape
ndarray의 크기를 확인할 수 있음

In [19]:
x.shape

(2, 2)

### np.dtype
ndarray의 dtype을 확인할 수 있음

In [20]:
x.dtype

dtype('int64')

### np.flags
dtype 저장 방식을 확인할 수 있음

In [21]:
x.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [22]:
x = np.array([[1, 2], [3, 4]], order='F')

In [23]:
x

array([[1, 2],
       [3, 4]])

In [24]:
x.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

### np.itemsize
요소 하나의 데이터 크기(byte)를 확인할 수 있음

In [25]:
x.itemsize

8

### np.ndim
ndarray의 차원을 확인할 수 있음

In [26]:
x.ndim

2

### boradcasting 연산

In [27]:
x = np.array([[1, 2], [3, 4]])
y = np.array([[5, 6], [7, 8]])

In [28]:
x + y

array([[ 6,  8],
       [10, 12]])

In [29]:
x * y

array([[ 5, 12],
       [21, 32]])

In [30]:
x / y

array([[0.2       , 0.33333333],
       [0.42857143, 0.5       ]])

### Transpose

In [31]:
x.T

array([[1, 3],
       [2, 4]])

In [32]:
x.transpose()

array([[1, 3],
       [2, 4]])

### np.strides
하나의 요소가 가지는 메모리 상의 크기(byte)

In [33]:
x.strides

(16, 8)

In [34]:
y = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

In [35]:
y.strides

(32, 16, 8)

Tensorflow에서 연습용으로 제공하는 데이터셋도 기본적으로 ndarray이다.

In [36]:
from tensorflow.keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [37]:
type(X_train), type(X_test), type(y_train), type(y_test)

(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)

### ndarray axis

In [38]:
np.mean(y, axis=2)

array([[1.5, 3.5],
       [5.5, 7.5]])

### np.arange

In [39]:
x = np.arange(10)

In [40]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [41]:
x.dtype

dtype('int64')

In [42]:
x.shape

(10,)

In [43]:
x.ndim

1

In [44]:
x.reshape((2, -1))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [45]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [46]:
y = x.reshape((2, -1))

In [47]:
y

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [48]:
y.shape

(2, 5)

In [49]:
y.sum(axis=0)

array([ 5,  7,  9, 11, 13])

In [50]:
y.sum(axis=1)

array([10, 35])

In [51]:
y.sum()

45

In [52]:
y.mean(axis=0)

array([2.5, 3.5, 4.5, 5.5, 6.5])

In [53]:
y.mean(axis=1)

array([2., 7.])

In [54]:
y.mean()

4.5

In [55]:
np.zeros((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [56]:
np.ones((3, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [57]:
np.full((3, 4), 3)

array([[3, 3, 3, 3],
       [3, 3, 3, 3],
       [3, 3, 3, 3]])

In [58]:
x = np.array([[1, 2, 3], [4, 5, 6]])

In [59]:
np.zeros_like(x)

array([[0, 0, 0],
       [0, 0, 0]])

In [60]:
np.ones_like(x)

array([[1, 1, 1],
       [1, 1, 1]])

In [61]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [62]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [63]:
np.diag(x)

array([1, 5])

In [64]:
np.diagonal(x)

array([1, 5])

In [65]:
np.tri(4)

array([[1., 0., 0., 0.],
       [1., 1., 0., 0.],
       [1., 1., 1., 0.],
       [1., 1., 1., 1.]])

In [66]:
np.triu(np.ones(4))

array([[1., 1., 1., 1.],
       [0., 1., 1., 1.],
       [0., 0., 1., 1.],
       [0., 0., 0., 1.]])

In [67]:
np.linspace(0, 100)

array([  0.        ,   2.04081633,   4.08163265,   6.12244898,
         8.16326531,  10.20408163,  12.24489796,  14.28571429,
        16.32653061,  18.36734694,  20.40816327,  22.44897959,
        24.48979592,  26.53061224,  28.57142857,  30.6122449 ,
        32.65306122,  34.69387755,  36.73469388,  38.7755102 ,
        40.81632653,  42.85714286,  44.89795918,  46.93877551,
        48.97959184,  51.02040816,  53.06122449,  55.10204082,
        57.14285714,  59.18367347,  61.2244898 ,  63.26530612,
        65.30612245,  67.34693878,  69.3877551 ,  71.42857143,
        73.46938776,  75.51020408,  77.55102041,  79.59183673,
        81.63265306,  83.67346939,  85.71428571,  87.75510204,
        89.79591837,  91.83673469,  93.87755102,  95.91836735,
        97.95918367, 100.        ])

In [68]:
np.logspace(1, 100)

array([1.00000000e+001, 1.04811313e+003, 1.09854114e+005, 1.15139540e+007,
       1.20679264e+009, 1.26485522e+011, 1.32571137e+013, 1.38949549e+015,
       1.45634848e+017, 1.52641797e+019, 1.59985872e+021, 1.67683294e+023,
       1.75751062e+025, 1.84206997e+027, 1.93069773e+029, 2.02358965e+031,
       2.12095089e+033, 2.22299648e+035, 2.32995181e+037, 2.44205309e+039,
       2.55954792e+041, 2.68269580e+043, 2.81176870e+045, 2.94705170e+047,
       3.08884360e+049, 3.23745754e+051, 3.39322177e+053, 3.55648031e+055,
       3.72759372e+057, 3.90693994e+059, 4.09491506e+061, 4.29193426e+063,
       4.49843267e+065, 4.71486636e+067, 4.94171336e+069, 5.17947468e+071,
       5.42867544e+073, 5.68986603e+075, 5.96362332e+077, 6.25055193e+079,
       6.55128557e+081, 6.86648845e+083, 7.19685673e+085, 7.54312006e+087,
       7.90604321e+089, 8.28642773e+091, 8.68511374e+093, 9.10298178e+095,
       9.54095476e+097, 1.00000000e+100])

In [69]:
np.empty((4, 8))

array([[3.78037293e-316, 6.94506406e-310, 6.94506406e-310,
        6.94506406e-310, 6.94506406e-310, 6.94506406e-310,
        6.94506406e-310, 6.94506405e-310],
       [6.94506406e-310, 6.94506406e-310, 6.94506406e-310,
        6.94506406e-310, 6.94506406e-310, 6.94506406e-310,
        6.94506406e-310, 6.94506406e-310],
       [6.94506406e-310, 6.94506406e-310, 6.94506406e-310,
        6.94506406e-310, 6.94506406e-310, 6.94506406e-310,
        6.94506406e-310, 6.94506406e-310],
       [6.94506406e-310, 6.94506405e-310, 3.03426187e-086,
        9.49178612e-259, 6.99011101e-077, 6.99011096e-077,
        5.45430308e-311, 2.42092166e-322]])

### empty vs random
- empty: 쓰레기(가비지)값이 들어있음
  - 쓰레기값이란, 값이 초기화가 되기 전 해당 변수 안에 들어있던 값을 의미함
  - 원래 들어있던 값은 메모리상의 주소값이 들어있음
- random: 특정 seed를 통해 값이 생성됨
- 참고: [Magic Value](https://en.wikipedia.org/wiki/Magic_number_(programming))

## NumPy Indexing
- ,
- ...
- 조건
- 마스킹
- 팬시

In [70]:
x[1, 1]

5

### Fancy Indexing

In [71]:
x[[0, 1], [1]]

array([2, 5])

In [72]:
x[[0, 1]]

array([[1, 2, 3],
       [4, 5, 6]])

In [73]:
x[[0], [1]]

array([2])

In [74]:
x[...]

array([[1, 2, 3],
       [4, 5, 6]])

In [75]:
x[1, ...]

array([4, 5, 6])

In [76]:
Ellipsis == ...

True

## Masking 기법

In [77]:
x = np.arange(10)

In [78]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [79]:
x[[True, False, True, True, True, True, True, True, True, True]]

array([0, 2, 3, 4, 5, 6, 7, 8, 9])

In [80]:
x > 3

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [81]:
x[[1, 2]]

array([1, 2])

In [82]:
x = np.arange(10).reshape(5, 2)

In [83]:
x

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [84]:
x[:, ...]

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [85]:
x[[0], :]

array([[0, 1]])

In [86]:
x[0]

array([0, 1])

In [87]:
x[[0]]

array([[0, 1]])

In [88]:
x[[[0]]]

  """Entry point for launching an IPython kernel.


array([[0, 1]])

In [89]:
x[[[[0]]]]

  """Entry point for launching an IPython kernel.


array([[[0, 1]]])

In [90]:
x[(0, 1)]

1

In [91]:
x[(2, 0)]

4

In [92]:
i = np.nditer(x)

In [93]:
i

<numpy.nditer at 0x7fd8ecc1d8f0>

In [94]:
next(i)

array(0)

행렬 연산, 브로드캐스팅은 크기를 맞춰야 가능함

In [95]:
np.array([1, 2]) + np.arange(10)

ValueError: operands could not be broadcast together with shapes (2,) (10,) 

In [None]:
np.array([1, 2]) + np.array([3, 4])

In [96]:
abs([1, 2, -3, -4, 5])

TypeError: bad operand type for abs(): 'list'

In [97]:
np.abs([-1, 2, -3, 4, 5])

array([1, 2, 3, 4, 5])

In [98]:
np.ceil([2.9, 3.1, 4.2, 3.14, 0.3])

array([3., 4., 5., 4., 1.])

ufunc은 설명이 나와있지 않은데, np.info로 설명을 볼 수 있음

In [99]:
from scipy.special import factorial

In [100]:
np.info()

 info(object=None, maxwidth=76,
      output=<ipykernel.iostream.OutStream object at 0x7fd93c7d5eb8>,
      toplevel='numpy')

Get help information for a function, class, or module.

Parameters
----------
object : object or str, optional
    Input object or name to get information about. If `object` is a
    numpy object, its docstring is given. If it is a string, available
    modules are searched for matching objects.  If None, information
    about `info` itself is returned.
maxwidth : int, optional
    Printing width.
output : file like object, optional
    File like object that the output is written to, default is
    ``stdout``.  The object has to be opened in 'w' or 'a' mode.
toplevel : str, optional
    Start search at this level.

See Also
--------
source, lookfor

Notes
-----
When used interactively with an object, ``np.info(obj)`` is equivalent
to ``help(obj)`` on the Python prompt or ``obj?`` on the IPython
prompt.

Examples
--------
>>> np.info(np.polyval) # doctest: +SKIP

In [101]:
np.info(factorial)

 factorial(n, exact=False)

The factorial of a number or array of numbers.

The factorial of non-negative integer `n` is the product of all
positive integers less than or equal to `n`::

    n! = n * (n - 1) * (n - 2) * ... * 1

Parameters
----------
n : int or array_like of ints
    Input values.  If ``n < 0``, the return value is 0.
exact : bool, optional
    If True, calculate the answer exactly using long integer arithmetic.
    If False, result is approximated in floating point rapidly using the
    `gamma` function.
    Default is False.

Returns
-------
nf : float or int or ndarray
    Factorial of `n`, as integer or float depending on `exact`.

Notes
-----
For arrays with ``exact=True``, the factorial is computed only once, for
the largest input, with each other result computed in the process.
The output dtype is increased to ``int64`` or ``object`` if necessary.

With ``exact=False`` the factorial is approximated using the gamma
function:

.. math:: n! = \Gamma(n+1)

Examples


In [102]:
np.iinfo(0)

iinfo(min=-9223372036854775808, max=9223372036854775807, dtype=int64)

In [103]:
np.where(x > 3)

(array([2, 2, 3, 3, 4, 4]), array([0, 1, 0, 1, 0, 1]))

In [104]:
np.where(x > 3, 1, 0)

array([[0, 0],
       [0, 0],
       [1, 1],
       [1, 1],
       [1, 1]])

In [105]:
x[np.where(x > 3)]

array([4, 5, 6, 7, 8, 9])

In [106]:
x = np.array([1, 2, 9, 5, 4, 3, 6, 8, 7])

In [107]:
np.max(x)

9

In [108]:
np.argmax(x)

2

In [109]:
np.mean(x)

5.0

In [110]:
x.argmax()

2

In [111]:
x.argmin()

0

In [112]:
x.max()

9

In [113]:
x.min()

1

In [114]:
x.mean()

5.0

In [115]:
np.pi

3.141592653589793

In [116]:
import math
math.pi

3.141592653589793

In [117]:
np.nan == float('nan')

False

In [118]:
n = float('nan')

In [119]:
np.set_printoptions(threshold=n)

ValueError: threshold must be numeric and non-NAN, try sys.maxsize for untruncated representation

In [120]:
np.set_printoptions(threshold=np.nan)

ValueError: threshold must be numeric and non-NAN, try sys.maxsize for untruncated representation

### numpy는 기본적으로 deepcopy

In [121]:
x = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
y = x.copy()

In [122]:
y[0][1] = 5

In [123]:
x

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [124]:
y

array([[1, 5, 3, 4],
       [5, 6, 7, 8]])

### shallow copy: np.view

In [125]:
x = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
y = x.view()

In [126]:
y[0][1] = 5

In [127]:
x

array([[1, 5, 3, 4],
       [5, 6, 7, 8]])

In [128]:
y

array([[1, 5, 3, 4],
       [5, 6, 7, 8]])