# NumPy

The fundamental package for scientific computing with Python.

## How to use numpy

In [1]:
import numpy as np

## Arrays

An *array* is the main data structure of *NumPy*.
An array consists of a grid of values, and it contains information about the raw data, how to locate an element,
and how to interpret an element. It has a grid of elements that can be indexed in various ways.

In [2]:
# 1-d array
a = np.array([2,3,4])

print(a)
print(a.shape)

[2 3 4]
(3,)


In [3]:
# 2-d arrays
b = np.array([[1,2,3], [4,5,6]])

print(b)
print(b.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


## Arrays vs Lists/Tuples/Sequences

Usually arrays are built from *list* and *tuples*, or other Python *iterables* and *sequences*

In [4]:
this_is_a_list_of_lists = [[1,2,3], [4,5,6]]

this_is_an_array = np.array(this_is_a_list_of_lists)

print(this_is_a_list_of_lists)
print(len(this_is_a_list_of_lists), '\n')

print(this_is_an_array)
print(this_is_an_array.shape)

[[1, 2, 3], [4, 5, 6]]
2 

[[1 2 3]
 [4 5 6]]
(2, 3)


And if I try to see an *array* as *list* or a *list* as an *array* ...
Hint: it will fail!

In [5]:
this_is_an_array.append(0)

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [6]:
this_is_a_list_of_lists.shape

AttributeError: 'list' object has no attribute 'shape'

## How to create *arrays*?

In [7]:
# from lists
b = np.array([[1,2,3], [4,5,6]])

print(b)
print(b.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


In [8]:
# from tuples
tuples = ((1,2,3), (4,5,6))
b = np.array(tuples)

print(b)
print(b.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


In [9]:
# from zeros
shape = (3, 4)
b = np.zeros(shape)

print(b)
print(b.shape)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
(3, 4)


In [10]:
# from ones
shape = (3, 4)
b = np.ones(shape)

print(b)
print(b.shape)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
(3, 4)


In [11]:
# from a range of numbers
start = 0
end = 10
interval = 1
b = np.arange(start, end, interval)

print(b)
print(b.shape)

[0 1 2 3 4 5 6 7 8 9]
(10,)


In [12]:
# from a range of numbers equally distributed
start = 0
end = 1
interval = 0.33
b = np.arange(start, end, interval)

print(b)
print(b.shape, '\n')

start = 0
end = 1
number_of_intervals = 3
b = np.linspace(start, end, number_of_intervals, endpoint=False)

print(b)
print(b.shape)

[0.   0.33 0.66 0.99]
(4,) 

[0.         0.33333333 0.66666667]
(3,)


## Basic Operations with arrays

Some Python operators do not have the expected behavior with *arrays*!!!

In [13]:
# addition

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2,3], [4,5,6]])

print('b')
print(b)
print(b.shape, '\n')

c = a + b

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2 3]
 [4 5 6]]
(2, 3) 

c
[[ 2  4  6]
 [ 8 10 12]]
(2, 3)


In [14]:
# subtraction

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2,3], [4,5,6]])

print('b')
print(b)
print(b.shape, '\n')

c = a - b

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2 3]
 [4 5 6]]
(2, 3) 

c
[[0 0 0]
 [0 0 0]]
(2, 3)


In [15]:
# multiplying by a number

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = 2

c = a * b

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

c
[[ 2  4  6]
 [ 8 10 12]]
(2, 3)


In [16]:
# exponentiation

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

c = a ** 2

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

c
[[ 1  4  9]
 [16 25 36]]
(2, 3)


But what about multiplying two *arrays*??

In [17]:
# multiplication

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2,3], [4,5,6]])

print('b')
print(b)
print(b.shape, '\n')

c = a * b

print('c')
print(c)
print(c.shape)

# multiplicação de matrizes não é feita de forma correta

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2 3]
 [4 5 6]]
(2, 3) 

c
[[ 1  4  9]
 [16 25 36]]
(2, 3)


The * operator performs element-wise multiplication of the *arrays*

In [18]:
# division

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2,3], [4,5,6]])

print('b')
print(b)
print(b.shape, '\n')

c = a / b
# divisão de matrizes nao é feita de forma correta

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2 3]
 [4 5 6]]
(2, 3) 

c
[[1. 1. 1.]
 [1. 1. 1.]]
(2, 3)


The / operator performs element-wise division of the *arrays*

## Array descriptive analysis

In [19]:
# sum, min, max, mean, ...

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

print(f'sum of the rows {a.sum(axis=0)}, and sum of the columns {a.sum(axis=1)}')
print(f'max of the rows {a.max(axis=0)}, and max of the columns {a.max(axis=1)}')
print(f'max of the rows {a.min(axis=0)}, and max of the columns {a.min(axis=1)}')
print(f'sum {a.sum()}, max {a.max()} and min {a.min()}')
print(f'mean {a.mean()}')

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

sum of the rows [5 7 9], and sum of the columns [ 6 15]
max of the rows [4 5 6], and max of the columns [3 6]
max of the rows [1 2 3], and max of the columns [1 4]
sum 21, max 6 and min 1
mean 3.5


## Basic Operations with arrays using NumPy functions

Some Python operators do not have the expected behavior with *arrays*!!!
So, we have *NumPy* functions to achieve the desired behavior!

In [20]:
# multiplication

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2], [3,4], [5,6]])

print('b')
print(b)
print(b.shape, '\n')

c = np.dot(a, b) # multiplicação de matrizes, verificar se cumrre o requesito do numero de linhas e de colunas

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2]
 [3 4]
 [5 6]]
(3, 2) 

c
[[22 28]
 [49 64]]
(2, 2)


And remember, the number of rows of A must be the same as the number of columns of b!!

In [21]:
# impossible multiplication

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2,3], [4,5,6]])

print('b')
print(b)
print(b.shape, '\n')

c = np.dot(a, b)

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2 3]
 [4 5 6]]
(2, 3) 


ValueError: shapes (2,3) and (2,3) not aligned: 3 (dim 1) != 2 (dim 0)

Remember to perform a division between two *arrays*,
we must convert one of the *arrays* into its inverse form

In [22]:
# division

a = np.array([[1,2,3], [4,5,6], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[2,-1,0], [-1,2,-1], [0,-1,2]])
b_inverse = np.linalg.inv(b) # inverso não é transposta
print('b_inverse')
print(b_inverse)
print(b_inverse.shape, '\n')

c = np.dot(a, b_inverse)

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]
 [4 5 6]]
(3, 3) 

b_inverse
[[0.75 0.5  0.25]
 [0.5  1.   0.5 ]
 [0.25 0.5  0.75]]
(3, 3) 

c
[[ 2.5  4.   3.5]
 [ 7.  10.   8. ]
 [ 7.  10.   8. ]]
(3, 3)


## Indexing, Slicing and Iterating

`np.array[indexes or slices for rows, indexes or slices for columns]`

An `int` object, for instance 0 or 1 or 2 can be indexes

A `slice` is composed as follows: `0:10:1`
In this case, 0 is start, 10 is end (non-inclusive) and 1 is the interval

Indexing creates a sub-view of the *array*, but does not perform a copy!!

In [23]:
# indexes
a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = a[1,2] # para selecionar um elemento de uma lisat, começa em 0 como o python

print('b')
print(b)
print(b.shape, '\n')

c = a[0,2]

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
6
() 

c
3
()


In [24]:
# slices
a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = a[0:2,2] # seleciona um intervalo de valores nos arrays

print('b')
print(b)
print(b.shape, '\n')

c = a[1,0:3]

print('c')
print(c)
print(c.shape, '\n')

d = a[0:2,0:3]

print('d')
print(d)
print(d.shape, '\n')


e = a[0:2:2,0:3:2]

print('e')
print(e)
print(e.shape, '\n')

f = a[0,:]

print('f')
print(f)
print(f.shape, '\n')

g = a[0,:1]

print('g')
print(g)
print(g.shape, '\n')

h = a[0,1:]

print('h')
print(h)
print(h.shape, '\n')

i = a[-1,1:]

print('i')
print(i)
print(i.shape, '\n')

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[3 6]
(2,) 

c
[4 5 6]
(3,) 

d
[[1 2 3]
 [4 5 6]]
(2, 3) 

e
[[1 3]]
(1, 2) 

f
[1 2 3]
(3,) 

g
[1]
(1,) 

h
[2 3]
(2,) 

i
[5 6]
(2,) 


In [25]:
# iterating
a = np.array([[1,2,3], [4,5,6]])

for row in a:
    print(row) #dá print a cada linha 

[1 2 3]
[4 5 6]


## Manipulation

One of the greatest advantages of using *NumPy* is the fact that one can easily manipulate
an *array*!

In [2]:
# flatten
a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = a.ravel()
g = a.flat()
print(g)
print('b')
print(b)
print(b.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 



TypeError: 'numpy.flatiter' object is not callable

In [27]:
# transpose

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = a.T # faz a transposta


print('b')
print(b)
print(b.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 4]
 [2 5]
 [3 6]]
(3, 2)


In [28]:
# reshape

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

new_shape = (3,2)
b = a.reshape(new_shape)

print('b')
print(b)
print(b.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2]
 [3 4]
 [5 6]]
(3, 2)


Notice the difference between *reshape* and *transpose*!!!

In [29]:
# vertical stacking  - Pega na primeira matriz e coloca a segunda por baixo

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2,3], [4,5,6]])

print('b')
print(b)
print(b.shape, '\n')

arrays_to_stack = [a, b]
c = np.vstack(arrays_to_stack)

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2 3]
 [4 5 6]]
(2, 3) 

c
[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]
(4, 3)


In [30]:
# horizontal stacking - pega na primeira matriz a adiciona ao lado da segunda

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2,3], [4,5,6]])

print('b')
print(b)
print(b.shape, '\n')

arrays_to_stack = [a, b]
c = np.hstack(arrays_to_stack)

print('c')
print(c)
print(c.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2 3]
 [4 5 6]]
(2, 3) 

c
[[1 2 3 1 2 3]
 [4 5 6 4 5 6]]
(2, 6)


In [31]:
# concatenate

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2,3], [4,5,6]])

print('b')
print(b)
print(b.shape, '\n')

arrays_to_stack = [a, b]
c = np.concatenate(arrays_to_stack)

print('c')
print(c)
print(c.shape, '\n')

arrays_to_stack = [a, b]
d = np.concatenate(arrays_to_stack, axis=1)

print('d')
print(d)
print(d.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2 3]
 [4 5 6]]
(2, 3) 

c
[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]
(4, 3) 

d
[[1 2 3 1 2 3]
 [4 5 6 4 5 6]]
(2, 6)


*concatenate* is the same as *hstack* or *vstack*!

In [32]:
# split

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.hsplit(a, 3) # colocamos a coluna que queremos fazer o split
print('b')
for array in b:
    print(array)
print(len(b), '\n')

c = np.vsplit(a, 2)
print('c')
for array in c:
    print(array)
print(len(c))

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1]
 [4]]
[[2]
 [5]]
[[3]
 [6]]
3 

c
[[1 2 3]]
[[4 5 6]]
2


## Copies and views

`b = a` is not copying

`b = a[0,2]` is just a view

`b = a.copy()` finally we have a copy

In [33]:
a = np.array([[1,2,3], [4,5,6]])  # ter cuidado com apontadores

print('a')
print(a)
print(a.shape, '\n')

# assignement
b = a
print(b is a, '\n')

#indexing/slicing
a = np.array([[1,2,3], [4,5,6]])
print('a')
print(a)

b = a[0, :]
print('b')
print(b)

b[0] = 100
print('a')
print(a)
print('b')
print(b, '\n')

# copy
a = np.array([[1,2,3], [4,5,6]])
b = a.copy()
print('b')
print(b is a)
print(b)

b[0, 0] = 100
print('a')
print(a)
print('b')
print(b)


a
[[1 2 3]
 [4 5 6]]
(2, 3) 

True 

a
[[1 2 3]
 [4 5 6]]
b
[1 2 3]
a
[[100   2   3]
 [  4   5   6]]
b
[100   2   3] 

b
False
[[1 2 3]
 [4 5 6]]
a
[[1 2 3]
 [4 5 6]]
b
[[100   2   3]
 [  4   5   6]]


## Useful hints

In [34]:
# boolean mask indexing

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

boolean_mask_b = a > 4
print('boolean_mask_b a > 4')
print(boolean_mask_b)
print(boolean_mask_b.shape, '\n')

b = a[boolean_mask_b]

print('b')
print(b)
print(b.shape, '\n')

boolean_mask_c = a == 4
print('boolean_mask_c a == 4')
print(boolean_mask_c)
print(boolean_mask_c.shape, '\n')

c = a[boolean_mask_c]

print('c')
print(c)
print(c.shape, '\n')


boolean_mask_d = a == 4
print('boolean_mask_d a == 4 but inverse')
print(boolean_mask_d)
print(boolean_mask_d.shape, '\n')

d = a[~boolean_mask_d]

print('d')
print(d)
print(d.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

boolean_mask_b a > 4
[[False False False]
 [False  True  True]]
(2, 3) 

b
[5 6]
(2,) 

boolean_mask_c a == 4
[[False False False]
 [ True False False]]
(2, 3) 

c
[4]
(1,) 

boolean_mask_d a == 4 but inverse
[[False False False]
 [ True False False]]
(2, 3) 

d
[1 2 3 5 6]
(5,)


In [35]:
# identity matrices
print(np.eye(2), '\n')
print(np.eye(4))

[[1. 0.]
 [0. 1.]] 

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [3]:
# np.dot is the same as the operator @

a = np.array([[1,2,3], [4,5,6]])

print('a')
print(a)
print(a.shape, '\n')

b = np.array([[1,2], [3,4], [5,6]])

print('b')
print(b)
print(b.shape, '\n')

c = np.dot(a, b)

print('c')
print(c)
print(c.shape, '\n')

d = a @ b

print('d')
print(d)
print(d.shape)

a
[[1 2 3]
 [4 5 6]]
(2, 3) 

b
[[1 2]
 [3 4]
 [5 6]]
(3, 2) 

c
[[22 28]
 [49 64]]
(2, 2) 

d
[[22 28]
 [49 64]]
(2, 2)
