<a href="https://colab.research.google.com/github/AfonsoPaula/Data-Analysis/blob/main/notebook_numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Numpy: Numeric Computing Library**

In [2]:
import sys
import numpy as np

### **Basic Numpy Arrays**

In [69]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [70]:
a = np.array([1, 2, 3, 4])

In [71]:
b = np.array([0, .5, 1, 1.5, 2])

In [72]:
a[0], a[1]

(1, 2)

In [73]:
a[0:]
# zero up to something

array([1, 2, 3, 4])

In [74]:
a[1:3]
#zero up to three

array([2, 3])

In [75]:
a[1:-1]
# element with index 1 to the last element (no include)

array([2, 3])

In [76]:
a[::2]
# new sequence that include every second element from the original sequence

array([1, 3])

In [77]:
# multi-indexing
b[0], b[2], b[-1]

b[[0, 2, -1]]

array([0., 1., 2.])

### **Array Types**

In [78]:
a

array([1, 2, 3, 4])

In [79]:
a.dtype

dtype('int64')

In [80]:
b

array([0. , 0.5, 1. , 1.5, 2. ])

In [81]:
b.dtype

dtype('float64')

In [82]:
np.array([1, 2, 3, 4], dtype=np.float)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  np.array([1, 2, 3, 4], dtype=np.float)


array([1., 2., 3., 4.])

In [83]:
np.array([1, 2, 3, 4], dtype=np.int8)

array([1, 2, 3, 4], dtype=int8)

In [84]:
c = np.array(['a', 'b', 'c'])

In [85]:
c.dtype

dtype('<U1')

* NumPy stores numbers, dates, booleands, but not a regular individual objects.

* NumPy is usually used for numeric processing.

In [86]:
d = np.array([{'a': 1}, sys])

In [87]:
d.dtype

dtype('O')

### **Dimensions and shapes**

In [88]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

In [89]:
A.shape

(2, 3)

In [90]:
A.ndim

2

In [91]:
A.size
# total numbers of elements we have

6

In [92]:
B = np.array([
    [
        [12, 11, 10],
        [9, 8, 7],
    ],
    [
        [6, 5, 4],
        [3, 2, 1]
    ]
])

In [93]:
B

array([[[12, 11, 10],
        [ 9,  8,  7]],

       [[ 6,  5,  4],
        [ 3,  2,  1]]])

In [94]:
B.shape

(2, 2, 3)

In [95]:
B.ndim

3

If the shape isn't consistent, it'll just fall back to regular Python objects

In [96]:
C = np.array([
    [
        [12, 11, 10],
        [9, 8, 7],
    ],
    [
        [6, 5, 4]
    ]
])

  C = np.array([


In [97]:
C.dtype

dtype('O')

In [98]:
C.shape

(2,)

In [99]:
C.size

2

In [100]:
type(C[0])

list

### **Indexing and Slicing of Matrices**

In [101]:
# Square Matrix
A = np.array([
#.   0. 1. 2.
    [1, 2, 3], # 0
    [4, 5, 6], # 1
    [7, 8, 9]  # 2
])

In [102]:
A[1]

array([4, 5, 6])

In [103]:
A[1][0]

A[1, 0]

4

In [104]:
A[0:2]
# Select everything from dimension on (rows)
# So in this case, we want zero up to two (rows)

array([[1, 2, 3],
       [4, 5, 6]])

In [105]:
# We can also pass other dimension
A[:, :2]
# every row
# select from column level, only want select the elements up to two

array([[1, 2],
       [4, 5],
       [7, 8]])

In [106]:
A[:2, :2]

array([[1, 2],
       [4, 5]])

In [107]:
A[:2, 2:]

array([[3],
       [6]])

In [108]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [109]:
A[1] = np.array([10, 10, 10])

In [110]:
A

array([[ 1,  2,  3],
       [10, 10, 10],
       [ 7,  8,  9]])

In [111]:
A[2] = 991 # Expand Operation

In [112]:
A

array([[  1,   2,   3],
       [ 10,  10,  10],
       [991, 991, 991]])

### **Summary Statistics**

In [113]:
a = np.array([1, 2, 3, 4])

In [114]:
a.sum()

10

In [115]:
a.mean()

2.5

In [116]:
a.std()

1.118033988749895

In [117]:
a.var()

1.25

In [118]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

In [119]:
A.sum()

45

In [120]:
A.mean()

5.0

In [121]:
A.std()

2.581988897471611

In [122]:
A.sum(axis=0)

array([12, 15, 18])

In [123]:
A.sum(axis=1)

array([ 6, 15, 24])

In [124]:
A.mean(axis=0)

array([4., 5., 6.])

In [125]:
A.mean(axis=1)

array([2., 5., 8.])

In [126]:
A.std(axis=0)

array([2.44948974, 2.44948974, 2.44948974])

In [127]:
A.std(axis=1)

array([0.81649658, 0.81649658, 0.81649658])

### **Broadcasting and Vectorized Operations**

In [128]:
a = np.arange(4)

In [129]:
a

array([0, 1, 2, 3])

In [130]:
a + 10

array([10, 11, 12, 13])

In [131]:
a * 10

array([ 0, 10, 20, 30])

In [132]:
a

array([0, 1, 2, 3])

In [133]:
a += 100

In [134]:
a

array([100, 101, 102, 103])

In [135]:
l = [0, 1, 2, 3]

In [136]:
[i * 10 for i in l] # for lists

# TThe main difference is that this is all optimized and extremely fast

[0, 10, 20, 30]

In [137]:
a = np.arange(4)

In [138]:
a

array([0, 1, 2, 3])

In [139]:
b = np.array([10, 10, 10, 10])

In [140]:
b

array([10, 10, 10, 10])

In [141]:
a + b

array([10, 11, 12, 13])

In [142]:
a * b

array([ 0, 10, 20, 30])

### **Boolean Arrays**

In [143]:
a = np.arange(4)

In [144]:
a

array([0, 1, 2, 3])

In [145]:
a[[0, -1]] #. a[0], a[-1]

array([0, 3])

In [146]:
a[[True, False, False, True]]

array([0, 3])

In [147]:
a >= 2

array([False, False,  True,  True])

In [148]:
a[a >= 2]

array([2, 3])

In [149]:
a.mean()

1.5

In [150]:
a[a > a.mean()]

array([2, 3])

In [151]:
a[~(a > a.mean())]

array([0, 1])

In [152]:
a[(a==0) | (a==1)]

array([0, 1])

In [153]:
a[(a <= 2) & (a % 2 == 0)]

array([0, 2])

In [154]:
A = np.random.randint(100, size=(3, 3))

In [155]:
A

array([[73, 27, 47],
       [27, 39, 23],
       [81, 33, 64]])

In [156]:
A[np.array([
    [True, False, True],
    [False, True, False],
    [True, False, True]
])]

array([73, 47, 39, 81, 64])

In [157]:
A > 30

array([[ True, False,  True],
       [False,  True, False],
       [ True,  True,  True]])

In [158]:
A[ A > 30]

array([73, 47, 39, 81, 33, 64])

### **Linear Algebra**

In [159]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

In [160]:
B = np.array([
    [6, 5],
    [4, 3],
    [2, 1]
])

In [161]:
A.dot(B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [162]:
A @ B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [163]:
B.T

array([[6, 4, 2],
       [5, 3, 1]])

In [164]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [165]:
B.T @ A

array([[36, 48, 60],
       [24, 33, 42]])

### **Size of Objects in Memory**

#### Int, Floats

In [166]:
# An integer in Python is > 24bytes
sys.getsizeof(1)

28

In [167]:
# Long are even larger
sys.getsizeof(10**100)

72

In [168]:
# Numpy size is much smaller
np.dtype(int).itemsize

8

In [169]:
np.dtype(float).itemsize

8

#### List are even larger

In [170]:
# A one-element list
sys.getsizeof([1])

64

In [171]:
# An array of one element in numpy
np.array([1]).nbytes

8

#### And performance is also important

In [172]:
l = list(range(1000))

In [173]:
a = np.arange(1000)

In [174]:
%time np.sum(a**2)

CPU times: user 107 µs, sys: 7 µs, total: 114 µs
Wall time: 119 µs


332833500

In [175]:
%time sum(x ** 2 for x in l)

CPU times: user 562 µs, sys: 0 ns, total: 562 µs
Wall time: 597 µs


332833500

### **Useful Numpy functions**

In [176]:
# RANDOM

print(np.random.random(size=2))
print(np.random.normal(size=2))
print(np.random.rand(2, 4))

[0.05215178 0.63820236]
[-0.78181472 -0.96782131]
[[0.60739038 0.76469341 0.79872276 0.74194482]
 [0.74051005 0.98549213 0.91595618 0.55622337]]


In [177]:
# ARANGE

print(np.arange(10))
print(np.arange(5, 10))
print(np.arange(0, 1, .1))

[0 1 2 3 4 5 6 7 8 9]
[5 6 7 8 9]
[0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9]


In [178]:
# RESHAPE

print(np.arange(10).reshape(2, 5))
print(np.arange(10).reshape(5, 2))

[[0 1 2 3 4]
 [5 6 7 8 9]]
[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]


In [179]:
# LINSPACE

print(np.linspace(0, 1, 5))
print(np.linspace(0, 1, 20))
print(np.linspace(0, 1, 20, False))

[0.   0.25 0.5  0.75 1.  ]
[0.         0.05263158 0.10526316 0.15789474 0.21052632 0.26315789
 0.31578947 0.36842105 0.42105263 0.47368421 0.52631579 0.57894737
 0.63157895 0.68421053 0.73684211 0.78947368 0.84210526 0.89473684
 0.94736842 1.        ]
[0.   0.05 0.1  0.15 0.2  0.25 0.3  0.35 0.4  0.45 0.5  0.55 0.6  0.65
 0.7  0.75 0.8  0.85 0.9  0.95]


In [180]:
# ZEROS, ONES, EMPTY

print(np.zeros(5))
print(np.zeros((3, 3)))
print(np.zeros((3, 3), dtype=np.int))
print("--------------------------")
print(np.ones(5))
print(np.ones((3, 3)))
print("--------------------------")
print(np.empty(5))
print(np.empty((2, 2)))

[0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[0 0 0]
 [0 0 0]
 [0 0 0]]
--------------------------
[1. 1. 1. 1. 1.]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
--------------------------
[1. 1. 1. 1. 1.]
[[0.25 0.5 ]
 [0.75 1.  ]]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  print(np.zeros((3, 3), dtype=np.int))


In [181]:
# INDENTITY AND EYE

print(np.identity(3))
print("-----------------")
print(np.eye(3, 3))
print("-----------------")
print(np.eye(8, 4))
print("-----------------")
print(np.eye(8, 4, k=1))
print("-----------------")
print(np.eye(8, 4, k=-3))
print("-----------------")
print("Hello World"[6])

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
-----------------
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
-----------------
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
-----------------
[[0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
-----------------
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 0.]]
-----------------
W
