# Introdução a NumPy

In [1]:
import numpy as np

In [2]:
A = np.arange(24)
print(A)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


In [3]:
A = np.arange(8,8+24).reshape(4,6)
print(A)

[[ 8  9 10 11 12 13]
 [14 15 16 17 18 19]
 [20 21 22 23 24 25]
 [26 27 28 29 30 31]]


### rows, cols, dimensions, shape and datatype

In [4]:
# print the number of rows of A:
print("number of rows = ", A.shape[0])

# print the number of cols of A:
print("number of cols = ", A.shape[1])

# print the number of dimensions of A:
print("number of dimensions = ", A.ndim)

# print the shape of A:
print("shape = ", A.shape)

# print the datatype of the elements of A
print("datatype = ", A.dtype )

number of rows =  4
number of cols =  6
number of dimensions =  2
shape =  (4, 6)
datatype =  int32


In [5]:
# create a boolean matrix B with True if the element of A é less than 15 (without using a loop)
B = A < 15
print("A = \n", A)
print("B = \n", B * 1.0)

A = 
 [[ 8  9 10 11 12 13]
 [14 15 16 17 18 19]
 [20 21 22 23 24 25]
 [26 27 28 29 30 31]]
B = 
 [[ 1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.]]


### Axis reduction: sum

In [6]:
print(A.sum(axis=0))
print(A.sum(axis=1))

[68 72 76 80 84 88]
[ 63  99 135 171]


In [7]:
# what is the number of dimensions of the array C = A.sum(axis=0)?
#R: It must be 1 because it is the sum a 2d array in one axis
C = A.sum(axis=0)
print("number of dimensions = ", C.ndim)

# compute the mean value of the array A
mean = C.sum(axis=0)/(A.shape[0] * A.shape[1])
print("mean = ", mean)

number of dimensions =  1
mean =  19.5


In [8]:
# create a matrix C that is a normalization of A, so that its value are between 0.0 and 1.0
C = A - np.min(A)
C = C/np.max(C)

print("C = \n", C)

C = 
 [[ 0.          0.04347826  0.08695652  0.13043478  0.17391304  0.2173913 ]
 [ 0.26086957  0.30434783  0.34782609  0.39130435  0.43478261  0.47826087]
 [ 0.52173913  0.56521739  0.60869565  0.65217391  0.69565217  0.73913043]
 [ 0.7826087   0.82608696  0.86956522  0.91304348  0.95652174  1.        ]]


In [9]:
# create a matrix D that is a nomalization of the columns of A, so that each column of D has values between 0. and 1.
D = A - np.min(A,axis=0)
D = D / D.max(axis=0)
print("D = \n", D)

D = 
 [[ 0.          0.          0.          0.          0.          0.        ]
 [ 0.33333333  0.33333333  0.33333333  0.33333333  0.33333333  0.33333333]
 [ 0.66666667  0.66666667  0.66666667  0.66666667  0.66666667  0.66666667]
 [ 1.          1.          1.          1.          1.          1.        ]]


### Slicing

In [12]:
# this operation is called slicing:
AA = A[:,1::2]
print(AA)

[[ 9 11 13]
 [15 17 19]
 [21 23 25]
 [27 29 31]]


In [13]:
# create matrix AB with the odd rows of matrix A, using the concept of slicing:
AB = A[1::2,:]
print("AB = \n", AB)

AB = 
 [[14 15 16 17 18 19]
 [26 27 28 29 30 31]]


### Matrix dot product

In [42]:
# create a matrix AC which is a rotation of 90 degrees anti-clockwise of the matrix A
%timeit AC = np.rot90(A)
%timeit AC = A.T[::-1,:]

AC = A.T[::-1,:]

print("AC = \n", AC)

The slowest run took 25.81 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 6.51 µs per loop
The slowest run took 42.50 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 461 ns per loop
AC = 
 [[13 19 25 31]
 [12 18 24 30]
 [11 17 23 29]
 [10 16 22 28]
 [ 9 15 21 27]
 [ 8 14 20 26]]


Compute $$ E = A A^T $$

In [43]:
%timeit E = np.dot(A, A.T)
%timeit E = A.dot(A.T)

E = np.dot(A, A.T)

print(E)

The slowest run took 20.63 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.17 µs per loop
The slowest run took 8.12 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.15 µs per loop
[[ 679 1057 1435 1813]
 [1057 1651 2245 2839]
 [1435 2245 3055 3865]
 [1813 2839 3865 4891]]


### 3-D array

In [44]:
F = A.reshape(2,4,3)
print(F)

[[[ 8  9 10]
  [11 12 13]
  [14 15 16]
  [17 18 19]]

 [[20 21 22]
  [23 24 25]
  [26 27 28]
  [29 30 31]]]


In [45]:
# print the number of dimensions of F
print("number of dimensions = ", F.ndim)

# print the shape of F
print("shape of F = ", F.shape)

# compute the mean value of the two matrices F[0] and F[1], using a single command using F.mean(??)
# print("mean value of the two matrices F[0] and F[1] = ", F.mean(2).mean(1))

print("mean value of the two matrices F[0] and F[1] = ", F.mean(axis = (1, 2)))

number of dimensions =  3
shape of F =  (2, 4, 3)
mean value of the two matrices F[0] and F[1] =  [ 13.5  25.5]


In [46]:
v = np.array([1,2,3,4])
# print(a)
print(v)

[1 2 3 4]


### Broadcasting

- What is the concept of broadcasting in NumPy?
- Answer:

It is when a smaller array does an operation across a bigger array without an explicit loop.
The dimensions must be adequate in order to perform this operation.


For example, the smaller matrix b is broadcast across the matrix a.

Further informaton can be found [here](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).

In [47]:
#Broadcasting example
a = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
b = np.array([3, 6, 8])
a * b

array([[  3.,  12.,  24.],
       [ 12.,  30.,  48.]])

In [48]:
# Using the concept of broadcasting, create matrix G with the sum of vector v to every column of A:

G = A.T + v
G = G.T

%timeit G = np.expand_dims(v, axis=1) + A

%timeit G = v.reshape(len(v),1) + A

%timeit G = v[:,None] + A

print("G = \n", G)

The slowest run took 26.48 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 3.22 µs per loop
The slowest run took 6.69 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.88 µs per loop
The slowest run took 8.40 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.61 µs per loop
G = 
 [[ 9 10 11 12 13 14]
 [16 17 18 19 20 21]
 [23 24 25 26 27 28]
 [30 31 32 33 34 35]]


### Function - one-hot encoding

Define a function that receives as input an one-dimensional array and the output a one-hot encoding following the example:

In [49]:
# input
aa = np.array([2,0,3,2,1])
print(aa)

[2 0 3 2 1]


In [50]:
# output
oo = np.array([[0,0,1,0],
               [1,0,0,0],
               [0,0,0,1],
               [0,0,1,0],
               [0,1,0,0]])
print(oo)

[[0 0 1 0]
 [1 0 0 0]
 [0 0 0 1]
 [0 0 1 0]
 [0 1 0 0]]


In [54]:
# Try to avoid explicit loop as much as possible
# Use only NumPy operations, no other libraries
def one_hot(i):
    # insert your code here
    
    M = np.zeros((i.size,np.max(i)+1), dtype=np.int8)
    M[np.arange(i.size),i] = 1    
    
    return M

print(one_hot(aa))

# test with other vectors
print(one_hot(np.array([1, 3, 2, 0, 5, 4]))) # The arguments are the index, not the lable

[[0 0 1 0]
 [1 0 0 0]
 [0 0 0 1]
 [0 0 1 0]
 [0 1 0 0]]
[[0 1 0 0 0 0]
 [0 0 0 1 0 0]
 [0 0 1 0 0 0]
 [1 0 0 0 0 0]
 [0 0 0 0 0 1]
 [0 0 0 0 1 0]]
