<a href="https://colab.research.google.com/github/Anjasfedo/data-analysis/blob/main/NumPy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction
What is NumPy?

NumPy is multi-dimensional array library (1D, 2D, 3D, etc)

How are list different from NumPy?
1. list:
  - slow (list contain 4 space such Size, Reference Count, Object Type, Object Value)
2. NumPy:
  - Fast (because of fixed type, by default got Int32)
  - faster to read less bytes of memory
  - no type checking when iterating through objects
  - Contiguous Memory
  - use SIMD (single instruction multiple data) Vector Processing
  - Effective Cache utilization

How are lists different from NumPy?
1. list, we can do some method:
  - insertion
  - deletion
  - appending
  - concatenation
  - etc
2. NumPy:
  - insertion
  - deletion
  - appending
  - concatenation
  - etc
  - Moree....

Application of NumPy?
- Mathematics (MATLAB Replacement)
- Plotting (Matplotlib)
- Backend (Pandas, Connect 4, Digital Photography)
- Machine Learning


# Import NumPy

In [2]:
import numpy as np
import sys

# The Basics

In [3]:
a = np.array([1, 2, 3], dtype="int16")
print(a)

[1 2 3]


In [4]:
b = np.array([[9.0, 8.0, 7.0], [6.0, 5.0, 4.0]])
print(b)

[[9. 8. 7.]
 [6. 5. 4.]]


In [5]:
# Get dimension
print(a.ndim)
print(b.ndim)

1
2


In [6]:
# Get shape
print(a.shape)
print(b.shape) # (row, column)

(3,)
(2, 3)


In [7]:
# Get type
print(a.dtype)
print(b.dtype)

int16
float64


In [8]:
# Get size (on memory)
print(a.itemsize) # int16 = 2 bytes
print(b.itemsize) # float64 = 8 bytes

2
8


In [9]:
# Get total size
print(a.size * a.itemsize)
print(b.size * b.itemsize)

# or with .nbytes
print(a.nbytes)
print(b.nbytes)

6
48
6
48


# Accessing/Changing spesific elements, rows, columns, etc

## 2D example

In [10]:
a = np.array([[1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14]])
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [11]:
# Get specific element [row, column] by indexing (on lists use [row][column])
a[1, 5]

13

In [12]:
# Get specific row
a[0, :]

array([1, 2, 3, 4, 5, 6, 7])

In [13]:
# Get specific column
a[:, 2]

array([ 3, 10])

In [14]:
# Getting little more fancy [start index:end index:step size]
a[0, 1:6:2]

array([2, 4, 6])

In [15]:
# Change specific element
print(a[1, 5])

a[1,5] = 20

print(a[1, 5])

13
20


In [16]:
# Change entire column
print(a)

# a[:, 2] = 5
a[:, 2] = [1, 2]

print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 20 14]]
[[ 1  2  1  4  5  6  7]
 [ 8  9  2 11 12 20 14]]


## 3D example

In [17]:
b = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [18]:
# Get specific element (work outside in)
b[0, 1, 1]

4

In [19]:
# Change
print(b)
print(b[:, 1, :])

b[:, 1, :] = [[999, 999], [999, 999]]

print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
[[3 4]
 [7 8]]
[[[  1   2]
  [999 999]]

 [[  5   6]
  [999 999]]]


# Initialixing Different Types of Arrays

In [20]:
# All 0s matrix
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [21]:
# All 1s matrix
np.ones((4, 2, 2), dtype="int32")

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]], dtype=int32)

In [22]:
# Any other number
print(np.full((2, 2), 99))

print(np.full(b.shape, 99))

[[99 99]
 [99 99]]
[[[99 99]
  [99 99]]

 [[99 99]
  [99 99]]]


In [23]:
# Any ohter number (full_like)
c = np.full_like(a, 4)

print(a.shape)
print(c.shape)

(2, 7)
(2, 7)


In [24]:
# Random decinal numbers
print(np.random.rand(4, 2))

# With shape of other array
print(np.random.random_sample(a.shape))

[[0.48943719 0.10580857]
 [0.79896478 0.94296295]
 [0.02712139 0.96950707]
 [0.81918454 0.60202361]]
[[0.77112071 0.48825383 0.37123421 0.56230731 0.06776829 0.66264551
  0.10879256]
 [0.30278379 0.94389881 0.83862506 0.76557099 0.25420123 0.25366365
  0.97615956]]


In [25]:
# Random integer values
print(np.random.randint(7, size=(3, 3))) # 7 is exclusive

print(np.random.randint(-2, 7, size=(3, 3)))

[[2 2 1]
 [6 0 1]
 [1 6 5]]
[[ 3  4 -2]
 [ 2  6  1]
 [ 2  0  2]]


In [26]:
# The identity matrix
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [27]:
# Repeat an array
arr = np.array([[1, 2, 3]])

r1 = np.repeat(arr, 3, axis=0)
print(r1)

[[1 2 3]
 [1 2 3]
 [1 2 3]]


# Problem #1

In [28]:
problem1 = np.ones((5, 5))

print(problem1)

problem1[1:-1, 1:-1] = 0

print(problem1)

problem1[2, 2] = 9

print(problem1)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]
[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


# Be careful when copying variables!

In [29]:
a = np.array([1, 2, 3])
b = a

b[0] = 100

print(f"a:{a}")
print(f"b:{b}")

# Change b will also change a

a:[100   2   3]
b:[100   2   3]


In [30]:
# Use .copy method
c = a.copy()

c[0] = 500

print(f"a:{a}")
print(f"c:{c}")

a:[100   2   3]
c:[500   2   3]


# Mathematics

In [31]:
a = np.array([1, -2, 3, 4])

print(a)

[ 1 -2  3  4]


In [32]:
# Addition
print(a + 2)

[3 0 5 6]


In [33]:
# Subtraction
print(a - 2)

[-1 -4  1  2]


In [34]:
# Multiplication
print(a * 2)

[ 2 -4  6  8]


In [35]:
# Dividing
print(a / 2)

[ 0.5 -1.   1.5  2. ]


In [36]:
b = np.array([1, 2, 1, 2])

print(a)
print(b)

[ 1 -2  3  4]
[1 2 1 2]


In [37]:
# Arithmetics two array
print(a + b)
print(a - b)
print(a * b)
print(a / b)

[2 0 4 6]
[ 0 -4  2  2]
[ 1 -4  3  8]
[ 1. -1.  3.  2.]


In [38]:
# Power
print(a ** 2)

[ 1  4  9 16]


In [39]:
# Take the sin
print(np.sin(a))

# Take the cos
print(np.cos(a))

[ 0.84147098 -0.90929743  0.14112001 -0.7568025 ]
[ 0.54030231 -0.41614684 -0.9899925  -0.65364362]


# Linear Algebra

In [40]:
a = np.ones((2, 3))
print(a)

b = np.full((3, 2), 2)
print(b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]


In [41]:
# Matrix multiplication
np.matmul(a, b)

array([[6., 6.],
       [6., 6.]])

In [42]:
c = np.identity(3)
print(c)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [43]:
# Find the determinant
np.linalg.det(c)

1.0

# Statistics

In [44]:
stats = np.array([[1, 2, 3], [4, 5, 6]])
print(stats)

[[1 2 3]
 [4 5 6]]


In [46]:
# Min & Max
print(np.min(stats))

print(np.max(stats))

1
6


In [49]:
print(np.min(stats, axis=0)) # Horizontal

print(np.min(stats, axis=1)) # Vertical

[1 2 3]
[1 4]


In [50]:
# Sum
print(np.sum(stats))

print(np.sum(stats, axis=0))

print(np.sum(stats, axis=1))

21
[5 7 9]
[ 6 15]


# Reorganizing Arrays

In [53]:
before = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(before)

after = before.reshape((4, 2)) # Sum of data must match
print(after)

[[1 2 3 4]
 [5 6 7 8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]


In [54]:
# Vertically stacking vectors
v1 = np.array([1, 2, 3, 4])
v2 = np.array([5, 6, 7, 8])

np.vstack([v1, v2, v1, v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [1, 2, 3, 4],
       [5, 6, 7, 8]])

In [55]:
# Horizontally stacking vectors
h1 = np.ones((2, 4))
h2 = np.zeros((2, 2))

np.hstack((h1, h2))

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])