#  Introduction to Numpy

In [None]:
import numpy as np

## Numpy 데이터 객체의 특성

### Numpy 데이터 객체는 built-in 리스트와 사칙연산이 다르게 진행된다.

In [None]:
a = [1,2,3]              # Python builit-in list
b = [4,5,6]
a

In [None]:
type(a)

In [None]:
a_arr = np.array(a)    # Numpy nodarray
b_arr = np.array(b)
a_arr

In [None]:
type(a_arr)

In [None]:
a + b           # 리스트는 사칙연산이 수행되지 않는다.

In [None]:
a_arr + b_arr    # Numpy array는 사칙연산이 수행된다.

### 동일한 연산에 대해서 Numpy는 빠르게 처리한다.

In [None]:
my_list = list(range(1000000))
%time my_list2 = [x *2 for x in my_list]    # 큰 데이터를 2배 곱하는 연산의 시간 측정

In [None]:
my_arr = np.arange(1000000)
%time my_arr2 = my_arr * 2

In [None]:
import sys

In [None]:
sys.getsizeof(my_list2)

In [None]:
sys.getsizeof(my_arr2)

##  Numpy 데이터 객체의 생성

### Numpy array() 함수를 이용한  ndarray 생성

In [None]:
x1 = np.array([1, 4, 2, 5, 3])    # create a Numpy ndarray from a list
x1

In [None]:
type(x1)

In [None]:
x1.shape

In [None]:
x1.dtype

In [None]:
x2 = np.array([3.14, 4, 2, 3])   # Numpy upcast if possible
x2

In [None]:
x2.dtype

In [None]:
L = [[1,2,3,4], [5,6,7,8], [9,10,11,12]]
x3 = np.array(L)     # create an explicit two-dimentional ndarray from a list
x3

In [None]:
type(x3)

In [None]:
x3.shape

In [None]:
x3.dtype

### 다양한 Numpy 함수들을 이용한 ndarray 생성

In [None]:
np.arange(10)

In [None]:
np.arange(0, 20, 2)     # Create an array filled with a linear sequence start at 0, end at 20, step by 2

In [None]:
np.zeros(10, dtype=int)    # create a length 10 array filled with 0s

In [None]:
np.ones((3,5), dtype=float)     # create 3*5 floating point array filled with 1s

In [None]:
np.linspace(0, 1, 5)     # Create an array of five values evenly spaced between 0 and 1

### Numpy.random 모듈을 이용한 random number 생성

In [None]:
np.random.rand(5)          # 0과 1사이의 값을 uniform distribution에 기반하여 난수를 생성함

In [None]:
np.random.rand(3, 5)

In [None]:
np.random.randn(5)        # 표준정규분포를 기반으로 난수를 생성함

In [None]:
np.random.randn(3,5)

In [None]:
np.random.randint(0, 10, (3,3))    # Create a 3*3 array of random integers in the interval [0, 10)

##  Numpy Arrays 기본 작업 (manipulation)

### Attributes of arrays:  size, shape, memory consumption and data types 파악

In [None]:
np.random.seed(0)   # seed for reproducibility
x1 = np.random.randint(10, size=6)         # One-dimensional array
x2 = np.random.randint(10, size=(3,4))     # two-dimensional array
x3 = np.random.randint(10, size=(3,4,5))   # three-dimensinal array

In [None]:
x1

In [None]:
x2

In [None]:
x3

In [None]:
print("x3 ndim: ", x3.ndim)     # x3 ndarray의 차원 속성값
print("x3 shape: ", x3.shape)   # x3 ndarrary의 각 차원별 형태 속성값
print("x3 size: ", x3.size)     # x3 ndarray의 크기 속성값
print("x3 dtype: ", x3.dtype)   # x3 ndarray element data type 속성값

In [None]:
x2.shape

### array Indexing : Accessing Single elements

In [None]:
print(x1[0])        # single dimensional ndarray의 indexing
print(x1[-1])
print(x2[0,0])      # two dimensional ndarray의 indexing
print(x2[2,0])

In [None]:
x1[0] = 99      # Numpy element값의 변경
x1

### array slicing: accessing subarrays

In [None]:
x1[1:3]     # [start:stop) start는 포함하고(include) stop은 포함하지 않는가 (exclude)   

In [None]:
x1[:3]

In [None]:
x1[0:5:2]     #slicing을 [start:stop:step] 형식으로 지정한다.

In [None]:
x2

In [None]:
x2[:2, :3]     # multi-dimesion을 콤마(,)구분하여 지정한다.

In [None]:
x2[:, 0]      # first column of x2

In [None]:
x2[0, :]     # first row of x2

In [None]:
x2[0]      # equivalent to x2[0,:]

#### slicing된 부분에 새로운 값을 할당하는 경우

In [None]:
x2[1:3, :2] = 0    # slice된 부분에 새로운 값 할당 (broadcasing됨)
x2

### Boolean Selection

In [None]:
data = np.arange(10)
data

In [None]:
data[data > 5]

In [None]:
data > 5

### Reshaping of Arrays

In [None]:
grid = np.arange(1,10).reshape((3,3))
grid

In [None]:
x = np.array([1,2,3])
x

In [None]:
y = x.reshape((3,1))
y

In [None]:
x.shape

In [None]:
x.ndim

In [None]:
y.shape

In [None]:
y.ndim

## Computation on Numpy Arrays

### Numpy vectorized operation vs. Python for loop operation (속도가 매우 빠름)

In [None]:
np.random.seed(0)

def compute_reciprocals(values):         # 1/value를 구하는 함수를 정의한다. (Python built-in 방식)
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0/values[i]
    return output

values = np.random.randint(1,10, size=5)
values

In [None]:
compute_reciprocals(values)

In [None]:
1 / values              #  Numpy vecotized operations을 이용한 사칙연산

In [None]:
big_array = np.random.randint(1,100, size=1000000)   
%time compute_reciprocals(big_array)      # 큰 파일을 이용하여 처리시간을 비교한다. (엄청 걸린다~~~)

In [None]:
%time (1.0 / big_array)      # 빨리 종료하지요(!!!)

### Numpy array 수리 연산

In [None]:
x = np.arange(4)
x

In [None]:
x + 5    #  vectorized operation with arithmetic operators

In [None]:
np.add(x, 10)    # Numpy warpper for arithmetic operators

In [None]:
# x1 = [0,1,2,3]     # Numpy array가 아니라 list를 사칙연산하면 에러가 나온다.
# x1 + 5

In [None]:
x - 5     # np.substract()

In [None]:
x * 5     # np.multiply()

In [None]:
x / 5     # np.divide()

In [None]:
x ** 2     # np.power()

In [None]:
x // 2     # np.floor_divide

In [None]:
x % 2     # np.mod()

In [None]:
-(0.5*x + 1) ** 2

In [None]:
x1 = np.arange(5)
x1

In [None]:
x2 = np.arange(1,6)
x2

In [None]:
x1 + x2      # vectorized opeation between two arrays

In [None]:
x1 * x2

In [None]:
x3 = np.arange(1,5)
x3

In [None]:
# x1 + x3

### Numpy Universal functions

In [None]:
arr = np.arange(10)
arr

In [None]:
np.sqrt(arr)

In [None]:
np.square(arr)

In [None]:
np.sum(arr)

In [None]:
np.mean(arr)

In [None]:
np.var(arr)

In [None]:
np.max(arr)

In [None]:
big_array = np.random.rand(1000000)

In [None]:
%time sum(big_array)     # with built-in Python sum()

In [None]:
%time np.sum(big_array)    # with Numpy sum() universal function

### Numpy universal function vs. method 사용방식

In [None]:
arr = np.arange(0,12).reshape(3,4)
arr

In [None]:
arr.sum()     # method 방식을 이용함

In [None]:
np.sum(arr)    # universal function sum() 함수가 수행된다.

In [None]:
arr.mean()

In [None]:
np.mean(arr)

In [None]:
np.sum(arr, axis=0)    # down the rows

In [None]:
np.sum(arr, axis=1)   # across the columns

In [None]:
arr.min(axis=0)       # find the minimum value within each column by specifying axis=0

In [None]:
np.min(arr, axis=0)

In [None]:
arr.min()

In [None]:
np.min(arr, axis=1)   # find the minimum value within each row by specifying axis=1

## Computation on Arrays: Broadcasting

In [None]:
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b

In [None]:
a + 5

In [None]:
arr = np.ones((3,3))
arr

In [None]:
arr + a

In [None]:
a = np.arange(3)
b = np.arange(3).reshape((3,1))
print(a)
print(b)

In [None]:
a + b

In [None]:
c = np.array([0,1,2,3])
c

In [None]:
# a + c

In [None]:
b + c