# Capítulo 2: Introdução ao Numpy

In [1]:
import numpy as np

In [2]:
#checando a versão do numpy
#checking numpy version
np.__version__

'1.19.2'

In [3]:
# criando um array de inteiros
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [4]:
# o numpy uniformiza os tipos presentes num array
np.array([np.pi,1,2,3,4])

array([3.14159265, 1.        , 2.        , 3.        , 4.        ])

In [5]:
A = np.array([range(i,i+3) for i in [2,4,6]])
A[0,1]

3

In [6]:
# Criando arrays do 0 alocando memória
np.zeros(10,dtype = int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [7]:
# criando matrizes 
np.ones((3,3),dtype = float)

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [8]:
# criando arrays repletos de um único valor
np.full((3,5),np.pi)

array([[3.14159265, 3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265, 3.14159265]])

In [9]:
# criando um array preenchido por uma sequência linear
# começando em 0, terminando em 20, de 2 em 2
np.arange(0,20,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [10]:
# criando um array de 0 a 1 com passos igualmente espaçados
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [11]:
# gerando um array de números aleatórios uniformemente distribuídos
np.random.random((3,3))

array([[0.78437273, 0.46299242, 0.01372989],
       [0.09875637, 0.43287457, 0.09649377],
       [0.42938755, 0.52911848, 0.26139942]])

In [12]:
# gerando a matriz identidade
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

# Atributos do Numpy Array

In [13]:
np.random.seed(0)

In [14]:
x1 = np.random.randint(10,size = 6)
x2 = np.random.randint(10,size = (3,4))
x3 = np.random.randint(10,size = (3,4,5))

In [15]:
# obtendo as propriedades do array

print("x3 ndim: ",x3.ndim)
print("x3 shape: ",x3.shape)
print("x3 size: ",x3.size)
print("dtype",x3.dtype)

x3 ndim:  3
x3 shape:  (3, 4, 5)
x3 size:  60
dtype int64


In [16]:
print("itemsize:",x3.itemsize,"bytes")

itemsize: 8 bytes


In [17]:
print("itemsize:",x3.itemsize,"bytes")
print("nbytes:",x3.nbytes,"bytes")

itemsize: 8 bytes
nbytes: 480 bytes


# Acessando os elementos de um array

In [18]:
x1 = np.random.randint(0,10,50)

In [19]:
# acessando o início do array
x1[2]
# acessando o final do array
x1[-2]

3

In [20]:
# Acessando os elementos de um array multidimensional

x2 = np.random.randint(0,10,size = (4,4))
x2[0,-2]

0

# Recortando Arrays

In [21]:
x = np.arange(10)

In [22]:
print(x[:5]) # pegando os primeiros 5 elementos
print(x[5:]) # pegando os elementos após o índice 5
print(x[4:7]) # pegando um subarray

[0 1 2 3 4]
[5 6 7 8 9]
[4 5 6]


In [23]:
# o formato básico para um recorte é início:fim:passo
print(x[::2]) # pegando os elementos de 2 em 2
print(x[1::2])# pegando os elementos de 2 em 2, começando do 2º
print(x[::-1]) # invertendo os elementos
print(x[5::-2]) # invertendo os elementos a partir do índice 5

[0 2 4 6 8]
[1 3 5 7 9]
[9 8 7 6 5 4 3 2 1 0]
[5 3 1]


In [24]:
# subarrays multidimensionais
print(x2[:2,:3]) #duas primeiras linhas, 3 primeiras colunas

[[0 6 0]
 [3 3 8]]


In [25]:
x2[:3,::2] # todas as linhas, colunas pulando de duas em 2

array([[0, 0],
       [3, 8],
       [8, 3]])

In [26]:
x2[::-1,::-1] #invertendo o array por completo

array([[3, 8, 8, 0],
       [2, 3, 2, 8],
       [8, 8, 3, 3],
       [6, 0, 6, 0]])

In [27]:
# Acessando linhas e colunas

print(x2[0,:]) # acessando a primeira linha

print(x2[:,0]) # acessando a primeira coluna

print(x2[0]) # acessando a primeira linha

[0 6 0 6]
[0 3 8 0]
[0 6 0 6]


In [28]:
# Sub-arrays sem cópias
print(x2[:2,:2])
x2sub = x2[:2,:2]
print(x2sub)
x2sub[0,0]=1
print(x2sub)
print(x2)
# o array original se modificou!

[[0 6]
 [3 3]]
[[0 6]
 [3 3]]
[[1 6]
 [3 3]]
[[1 6 0 6]
 [3 3 8 8]
 [8 2 3 2]
 [0 8 8 3]]


## criando cópias de arrays

In [29]:
#Criando cópias de arrays
x2_sub_copy = x2[:2,:2].copy()
x2_sub_copy[0,0] = 57
print('array novo:\n',x2_sub_copy)
print('array antigo:\n', x2)
# o array pai permaneceu inalterado

array novo:
 [[57  6]
 [ 3  3]]
array antigo:
 [[1 6 0 6]
 [3 3 8 8]
 [8 2 3 2]
 [0 8 8 3]]


# Remodelando arrays

In [33]:
grid = np.arange(1,10).reshape(3,3)
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [37]:
x = np.array([1,2,3])
# vetor-linha via reshape
x.reshape((1,3))

array([[1, 2, 3]])

In [38]:
# vetor linha via método newaxis
x[np.newaxis,:]

array([[1, 2, 3]])

In [39]:
# vetor coluna via reshape
x.reshape((3,1))

array([[1],
       [2],
       [3]])

In [40]:
#vetor coluna via newaxis:
x[:,np.newaxis]

array([[1],
       [2],
       [3]])

# Concatenando Arrays

In [44]:
x = np.array([1,2,3])
y = x[::-1]
z = np.array([99,99,99])
print(np.concatenate([x,y])) # concatenando apenas 2 arrays
print(np.concatenate([x,y,z])) # concatenando 3 ou mais arrays

[1 2 3 3 2 1]
[ 1  2  3  3  2  1 99 99 99]


In [46]:
# np.concatenate também tem validade para arrays bidimensionais
grid = np.array([[1,2,3],[4,5,6]])
print(np.concatenate([grid,grid],axis = 0)) # concatenação nas linhas
print(np.concatenate([grid,grid],axis = 1)) # concatenação nas colunas


[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]
[[1 2 3 1 2 3]
 [4 5 6 4 5 6]]


In [49]:
'''Para arrays de dimensão distinta, recomenda-se o uso 
de vstack e hstack'''
x = np.array([1, 2, 3])
y = np.array([[99],
               [99]])
grid = np.array([[9, 8, 7],
[6, 5, 4]])
print(np.vstack([x,grid]))
print(np.hstack([grid,y]))

[[1 2 3]
 [9 8 7]
 [6 5 4]]
[[ 9  8  7 99]
 [ 6  5  4 99]]


# Dividindo arrays

In [51]:
x = [1,2,3,99,99,3,2,1]
x1,x2,x3 = np.split(x,[3,5])
print(x1,x2,x3)
# Perceba que o split gera N+1 subarrays

[1 2 3] [99 99] [3 2 1]


In [58]:
# vsplit
grid = np.arange(16).reshape((4,4))
upper,lower = np.vsplit(grid,2)
print(upper,'\n',lower)

[[0 1 2 3]
 [4 5 6 7]] 
 [[ 8  9 10 11]
 [12 13 14 15]]


In [60]:
# hsplit

left,right = np.hsplit(grid,[2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


# Universal Functions

In [67]:
# exemplo sem ufunc

np.random.seed(0)
def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1/values[i]
    return output
values = np.random.randint(1,10,size = 5)
compute_reciprocals(values)

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [68]:
big_array = np.random.randint(1,100,size = 1000000)
%timeit compute_reciprocals(big_array)

360 ms ± 10.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [69]:
# com ufunc
%timeit 1/big_array

1.54 ms ± 91.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# Explorando as Ufuncs

In [74]:
x = np.arange(4)
print("x+5 = ",x+5)
print("x-5 = ",x-5)
print("x*2 = ",x*2)
print("x/2 = ",x/2)
print("x//2 = ",x//2)
print("x%2 = ",x%2)

x+5 =  [5 6 7 8]
x-5 =  [-5 -4 -3 -2]
x*2 =  [0 2 4 6]
x/2 =  [0.  0.5 1.  1.5]
x//2 =  [0 0 1 1]
x%2 =  [0 1 0 1]


In [79]:
# funções trigonométricas
theta = np.linspace(0,np.pi,3)
print("theta = ",theta)
print("sin(theta) = ",np.sin(theta))
print("cos(theta) = ",np.cos(theta))
print("tan(theta) = ",np.tan(theta))
# funções trigonométricas inversas
x = [-1,0,1]
print("x = ",x)
print("arcsin(x) = ",np.arcsin(x))
print("arccos(x) = ",np.arccos(x))
print("arctan(x) = ",np.arctan(x))

theta =  [0.         1.57079633 3.14159265]
sin(theta) =  [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos(theta) =  [ 1.000000e+00  6.123234e-17 -1.000000e+00]
tan(theta) =  [ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]
x =  [-1, 0, 1]
arcsin(x) =  [-1.57079633  0.          1.57079633]
arccos(x) =  [3.14159265 1.57079633 0.        ]
arctan(x) =  [-0.78539816  0.          0.78539816]


In [80]:
# funções exponenciais
x = [1,2,3]
print("x =", x)
print("e^x =", np.exp(x))
print("2^x =", np.exp2(x))
print("3^x =", np.power(3, x))

x = [1, 2, 3]
e^x = [ 2.71828183  7.3890561  20.08553692]
2^x = [2. 4. 8.]
3^x = [ 3  9 27]


In [82]:
# funções logarítmicas
x = [1, 2, 4, 10]
print("x =", x)
print("log(x) =", np.log(x))
print("log2(x) =", np.log2(x))
print("log10(x) =", np.log10(x))

x = [1, 2, 4, 10]
log(x) = [0.         0.69314718 1.38629436 2.30258509]
log2(x) = [0.         1.         2.         3.32192809]
log10(x) = [0.         0.30103    0.60205999 1.        ]


In [83]:
x = [0,0.001,0.01,0.1]
print("exp(x)-1",np.expm1(x))
print("log(1+x) = ",np.log1p(x))

exp(x)-1 [0.         0.0010005  0.01005017 0.10517092]
log(1+x) =  [0.         0.0009995  0.00995033 0.09531018]


In [86]:
# funções especiais
from scipy import special
x = [1,5,10]
print("gamma(x) = ",special.gamma(x))
print("ln|gamma(x)| = ", special.gammaln(x))
print("beta(x,2) = ",special.beta(x,2))

gamma(x) =  [1.0000e+00 2.4000e+01 3.6288e+05]
ln|gamma(x)| =  [ 0.          3.17805383 12.80182748]
beta(x,2) =  [0.5        0.03333333 0.00909091]


In [87]:
x = np.array([0, 0.3, 0.7, 1.0])
print("erf(x) =", special.erf(x))
print("erfc(x) =", special.erfc(x))
print("erfinv(x) =", special.erfinv(x))

erf(x) = [0.         0.32862676 0.67780119 0.84270079]
erfc(x) = [1.         0.67137324 0.32219881 0.15729921]
erfinv(x) = [0.         0.27246271 0.73286908        inf]


# ufuncs avançadas

In [92]:
# especificando a saida para ganho de desempenho
x = np.arange(5)
y = np.empty(5)
np.multiply(x,10,out = y)
print(y)

[ 0. 10. 20. 30. 40.]


In [93]:
y = np.zeros(10)
np.power(2,x,out = y[::2])
print(y)

[ 1.  0.  2.  0.  4.  0.  8.  0. 16.  0.]


In [94]:
# agregados
x = np.arange(1,6)
np.add.reduce(x)

15

In [96]:
np.multiply.reduce(x)

120

In [101]:
print(np.add.accumulate(x))
# idêntico a 
print(np.cumsum(x))

[ 1  3  6 10 15]
[ 1  3  6 10 15]


In [102]:
np.multiply.accumulate(x)

array([  1,   2,   6,  24, 120])

In [103]:
# produto externo
x = np.arange(1,6)
np.multiply.outer(x,x)

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])

# Agregados: Máximo, mínimo e o meio disso

In [105]:
L = np.random.random(100)
%timeit sum(L)

16 µs ± 683 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [107]:
%timeit np.sum(L) #5* mais rápido que o sum!!!!!!!!!!!!!!!

3.23 µs ± 68 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [108]:
big_array = np.random.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)

153 ms ± 2.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
431 µs ± 11.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [109]:
min(big_array),max(big_array)

(7.071203171893359e-07, 0.9999997207656334)

In [110]:
np.min(big_array),np.max(big_array)

(7.071203171893359e-07, 0.9999997207656334)

In [111]:
%timeit min(big_array),max(big_array)

194 ms ± 5.24 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [112]:
%timeit np.min(big_array),np.max(big_array)

1.01 ms ± 40 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [117]:
# sintaxe compacta
big_array.max(),big_array.min()

(0.9999997207656334, 7.071203171893359e-07)

# Agregados Multidimensionais

In [118]:
M = np.random.random((3,4))
print(M)

[[0.79832448 0.44923861 0.95274259 0.03193135]
 [0.18441813 0.71417358 0.76371195 0.11957117]
 [0.37578601 0.11936151 0.37497044 0.22944653]]


In [120]:
M.sum()

5.1136763453287335

In [122]:
M.min(axis = 0)

array([0.18441813, 0.11936151, 0.37497044, 0.03193135])

In [123]:
M.max(axis = 1)

array([0.95274259, 0.76371195, 0.37578601])

In [126]:
# outras versões agregadas
Y = np.reshape(M,M.shape[0]*M.shape[1])

In [132]:
# função e sua nan safe
print(np.sum(Y),np.nansum(Y))
print(np.prod(Y),np.nanprod(Y))
print(np.mean(Y),np.nanmean(Y))
print(np.percentile(Y,10),np.nanpercentile(Y,10))

5.1136763453287335 5.1136763453287335
5.064029584029998e-07 5.064029584029998e-07
0.4261396954440611 0.4261396954440611
0.11938247707433779 0.11938247707433779


# Pandas interlude