# Python - Numpy

This notebook introduces to Python numpy library.

In [0]:
import numpy as np

Numpy as library built on top of C++, so operations with numpy on "large" scale are much faster than in base python. This effects stacks up as you use many numpy functions together.

In [61]:
%%time
# Numpy max
for i in range(10000):
    np.max(np.random.random(1000))

CPU times: user 154 ms, sys: 786 µs, total: 155 ms
Wall time: 159 ms


In [62]:
%%time
# Base Max
for i in range(10000):
    max(np.random.random(1000))

CPU times: user 1.13 s, sys: 38 µs, total: 1.13 s
Wall time: 1.13 s


### Some numpy functions.

In [63]:
# minimum
np.min([1,2])

1

In [64]:
# logarithm with base e
np.log(3)

1.0986122886681098

In [65]:
# Exponentiation with base e
np.exp(2)

7.38905609893065

In [66]:
a = [1, 0, -1, 2, 5, 50]
np.exp(a)

array([2.71828183e+00, 1.00000000e+00, 3.67879441e-01, 7.38905610e+00,
       1.48413159e+02, 5.18470553e+21])

Numpy arrays are a very popular type of data types in python which are behaving much like **lists** but are more flexible. Usually they are used as **matrices**.

In [67]:
my_array = np.array([1,2])
my_array

array([1, 2])

In [68]:
my_array = np.array([1,3,5,7,9])
my_array = np.append(my_array,[10,11])
print(my_array)

[ 1  3  5  7  9 10 11]


In [69]:
my_array = np.array([[1,3,5,7,9],[1,2,5,7,0]])
my_array

array([[1, 3, 5, 7, 9],
       [1, 2, 5, 7, 0]])

In [70]:
# you can see dimensions of array by using .shape method
my_array.shape

(2, 5)

In [71]:
# you can set the result of the .shape method to variables
m,n = my_array.shape
print('m = ', m,'\nn = ',n)

m =  2 
n =  5


In [72]:
# Reverse a vector (first element becomes last) 
Z = np.arange(12,50,4)
print(Z)
Z = Z[::-1]
print(Z)

[12 16 20 24 28 32 36 40 44 48]
[48 44 40 36 32 28 24 20 16 12]


In [73]:
my_array[::-1,::-1]

array([[0, 7, 5, 2, 1],
       [9, 7, 5, 3, 1]])

In [74]:
# Create a 10x10 array with random values and find the minimum and maximum values
Z = np.random.random((10,10))
Zmin, Zmax = Z.min(), Z.max()
#print(Z)
print(Zmin, Zmax)

0.0010254035766259495 0.9965944746883831


In [75]:
# Find the mean
np.mean(Z)

0.4948514541791776

### Some easy tools to create arrays.

In [76]:
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [77]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [78]:
np.full((2,3),15)

array([[15, 15, 15],
       [15, 15, 15]])

In [79]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [80]:
np.random.random((2,3))

array([[0.59530757, 0.95565639, 0.26341442],
       [0.93019955, 0.07156751, 0.78495935]])

In [81]:
np.random.seed(42)
np.random.random((2,3))

array([[0.37454012, 0.95071431, 0.73199394],
       [0.59865848, 0.15601864, 0.15599452]])

In [82]:
#Find indices of non-zero elements from
nz = np.nonzero([1,2,0,0,4,0,5,0])
print(nz)

(array([0, 1, 4, 6]),)


### Numpy array slicing

In [0]:
x = np.array([[1,2,3],[3,4,5]])

In [84]:
x

array([[1, 2, 3],
       [3, 4, 5]])

In [85]:
# get first row
x[1]

array([3, 4, 5])

In [86]:
# get second column
x[:,1]

array([2, 4])

In [87]:
# get up to second row and up to second column
x[:2,:2]

array([[1, 2],
       [3, 4]])

In [88]:
#Create a 2d array with 1 on the border and 0 inside 
Z = np.ones((10,10))
Z[1:-1,1:-1] = 0
print(Z)

[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]


In [89]:
#Add a border (filled with 0's) around an existing array
Z = np.ones((5,5))
Z = np.pad(Z, pad_width=1, mode='constant', constant_values=0)
print(Z)

[[0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


In [90]:
Z = np.ones((5,5))
Z = np.pad(Z, pad_width=3, mode='constant', constant_values=5)
print(Z)

[[5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
 [5. 5. 5. 1. 1. 1. 1. 1. 5. 5. 5.]
 [5. 5. 5. 1. 1. 1. 1. 1. 5. 5. 5.]
 [5. 5. 5. 1. 1. 1. 1. 1. 5. 5. 5.]
 [5. 5. 5. 1. 1. 1. 1. 1. 5. 5. 5.]
 [5. 5. 5. 1. 1. 1. 1. 1. 5. 5. 5.]
 [5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]]


### Numpy arrays operations

In [0]:
# defining dtype as float32 instead of float64 will cost in precision in floating point operations,
# but it can cut processsing time significantly
x = np.array([[1,2],[3,4]], dtype=np.float32)
y = np.array([[5,6],[7,8]], dtype=np.float32)

In [92]:
x

array([[1., 2.],
       [3., 4.]], dtype=float32)

In [93]:
y

array([[5., 6.],
       [7., 8.]], dtype=float32)

In [94]:
# Elementwise addition
%%time
np.add(x, y)

CPU times: user 32 µs, sys: 6 µs, total: 38 µs
Wall time: 41.5 µs


array([[ 6.,  8.],
       [10., 12.]], dtype=float32)

In [95]:
%%time
x + y

CPU times: user 30 µs, sys: 5 µs, total: 35 µs
Wall time: 40.3 µs


array([[ 6.,  8.],
       [10., 12.]], dtype=float32)

In [96]:
# Elementwise multiplication
np.multiply(x, y)

array([[ 5., 12.],
       [21., 32.]], dtype=float32)

In [97]:
# Matrix multiplication
np.dot(x, y)

array([[19., 22.],
       [43., 50.]], dtype=float32)

In [98]:
#The same with shortcut
x @ y

array([[19., 22.],
       [43., 50.]], dtype=float32)

In [99]:
%%time
a = np.random.rand(1000000)
b = np.random.rand(1000000)
print(a.shape)
print(b.shape)
c = np.dot(a,b)
print(c)

(1000000,)
(1000000,)
249917.36043890243
CPU times: user 25.6 ms, sys: 7.93 ms, total: 33.5 ms
Wall time: 43.9 ms


In [100]:
%%time
d = 0
for i in range(len(a)):
  d += a[i] * b[i]
print(c)

249917.36043890243
CPU times: user 583 ms, sys: 1.98 ms, total: 585 ms
Wall time: 590 ms


In [101]:
# Transpose matrix
x.T
#x.transpose()

array([[1., 3.],
       [2., 4.]], dtype=float32)

In [102]:
# Matrix inversion
np.linalg.inv(x)

array([[-2. ,  1. ],
       [ 1.5, -0.5]], dtype=float32)

In [103]:
x

array([[1., 2.],
       [3., 4.]], dtype=float32)

In [104]:
# Multiply the matrix with its inverse
print(np.dot(x, np.linalg.inv(x)))
print(np.dot(np.linalg.inv(x), x))

[[1. 0.]
 [0. 1.]]
[[1. 0.]
 [0. 1.]]


In [105]:
#Given a 1D array, negate all elements which are between 3 and 8, in place.
Z = np.arange(11)
print(Z)
Z[(3 < Z) & (Z <= 8)] *= -1
print(Z)

[ 0  1  2  3  4  5  6  7  8  9 10]
[ 0  1  2  3 -4 -5 -6 -7 -8  9 10]


In [106]:
Z

array([ 0,  1,  2,  3, -4, -5, -6, -7, -8,  9, 10])

For additional resourses concerning to the first 4 lessons visit http://cs231n.github.io/python-numpy-tutorial/.

In [107]:
#Create a vector of size 10 with values ranging from 0 to 1
Z = np.linspace(0,1,10,endpoint=False)
print(Z)

[0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9]


In [108]:
#Create a random vector of size 10 and sort it
Z = np.random.random(10)
print(Z)
Z.sort()
print(Z)

[0.65422382 0.82019251 0.90473721 0.13778516 0.15539772 0.30713981
 0.92190893 0.70724216 0.52847754 0.31838341]
[0.13778516 0.15539772 0.30713981 0.31838341 0.52847754 0.65422382
 0.70724216 0.82019251 0.90473721 0.92190893]


In [109]:
#Make an array immutable (read-only) 
Z = np.zeros(10)
Z[0] = 1
print(Z)
Z.flags.writeable = False
#Z[0] = 1

[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [110]:
#Create random vector of size 10 and replace the maximum value by 0
Z = np.random.random(10)
print(Z)
Z[Z.argmax()] = 0
print(Z)

[0.54610143 0.84563959 0.76074656 0.20226323 0.03965411 0.40199364
 0.53778507 0.05260939 0.47047281 0.42236243]
[0.54610143 0.         0.76074656 0.20226323 0.03965411 0.40199364
 0.53778507 0.05260939 0.47047281 0.42236243]


In [111]:
#Find the closest value (to a given scalar) in a vector
Z = np.arange(100)
v = np.random.uniform(0,100)
print(v)
index = (np.abs(Z-v)).argmin()
print(Z[index])

5.899429114360554
6


### Warm up Exercises

NOTE: after you write your code run the cells with **assert** statements for each task in below cell. If an Error is thrown, then you have done something wrong. If nothing is printed then , the exercise is done correctly. Do NOT modify cells with **assert** statements.

1) Write a function which calculates The following formula. $(XX^{T})^{-1}$

In [0]:
def transpose_inv(X):
    return np.linalg.inv(np.dot(X, X.T))

In [0]:
assert transpose_inv(np.array([[1,0],[1,5]]))[1][1] == 0.04
assert transpose_inv(np.array([[10,0],[20,55]]))[0][0] == 0.011322314049586777
assert transpose_inv(np.array([[10,1],[78,10]]))[0][1] == -1.6322314049583755

2) Write a function which calculates the dot product of the two given matrices


In [0]:
def my_dot(A, B):
  result = np.zeros((A.shape[0], B.shape[1]))
  for i in range(len(A)):
   # iterate through columns of B
   for j in range(len(B[0])):
       # iterate through rows of B
       for k in range(len(B)):
           result[i][j] += A[i][k] * B[k][j] 
  
  return result

In [0]:
assert my_dot(np.array([[1, 3], [5,6]]), np.array([[5,9], [5,4]]))[0,1] == 21
assert my_dot(np.array([[1, 3], [5,6]]), np.array([[5,9], [5,4]]))[1,1] == 69
assert my_dot(np.array([[1, 3, 5], [5, 6, 2]]), np.array([[5,9], [5,4], [4,6]]))[1,0] == 63

3) Write a function, that will return the derivative of the f(x) = $x^{3}$ + 4 * $x^{2}$ - 8 * $x$ + 1 function in any point $x_{0}$

In [0]:
def my_deriv(x0):
  deriv = 3 * x0**2 + 8 * x0 - 8
  return deriv

In [0]:
assert my_deriv(0) == -8
assert my_deriv(2) == 20

Write a function, that will return the gradient of the f($x_{1}$, $x_{2}$) = $x_{1}^{2}$ * $x_{2}^{3}$  + 4 * $x_{1}^{3}$ * $x_{2}$ - 8 * $x_{2}^{3}$ + 1 function in any point $(x_{1}$, $x_{2})_{0}$. Please return the result as a list.

In [0]:
def my_grad(x1_0, x2_0):
  x1_1 = 2 * x1_0 * (x2_0 ** 3) + 12 * (x1_0 ** 2) * x2_0
  x2_1 = (x1_0 ** 2) * 3 * (x2_0 ** 2) + 4 * (x1_0 ** 3) - 24 * (x2_0 ** 2)
  return [x1_1, x2_1]

In [0]:
assert my_grad(0, 0) == [0,0]
assert my_grad(2, 3) == [252, -76]