# NumPy Tutorial

### Load in NumPy 

In [1]:
import numpy as np

### Basics

In [2]:
# 1D array
a = np.array([1,2,3])
print(a)

[1 2 3]


In [3]:
# 2D array with float
b = np.array([[1.0,4.0,2.0],[8.0,3.0,7.0]])
print(b)

[[1. 4. 2.]
 [8. 3. 7.]]


In [4]:
# Get dimension
a.ndim

1

In [5]:
# Get shape 
a.shape

(3,)

In [6]:
# Get shape
b.shape

(2, 3)

In [7]:
# Get type by default
a.dtype

dtype('int32')

In [8]:
# if we want to specify data type, for example,
a = np.array([1,2,3], dtype='int16')
a.dtype

dtype('int16')

In [9]:
# Get size
a.itemsize
# if int16, then a.dtype returns 2

2

In [10]:
# Get number of elements
a.size

3

In [11]:
# Get total size
a.size * a.itemsize

6

In [12]:
# Get total size
a.nbytes

6

In [13]:
# Get size for float data type
b.itemsize

8

### Accessing/changing specific elements, rows, columns, etc.

In [14]:
a = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [15]:
# Get shape
a.shape

(2, 7)

In [16]:
# Get a specific element [row, column]
a[1, 5]

13

In [17]:
# Get a specific row 
a[0, :]

array([1, 2, 3, 4, 5, 6, 7])

In [18]:
# Get a specific column
a[:, 2]

array([ 3, 10])

In [19]:
# Get every other element in the first row [start index:end index:stepsize]
# for example, [2, 4, 6]
a[0,1:-1:2]

array([2, 4, 6])

In [20]:
# Replace specific elements 
a[1,5] = 20
print(a) 

a[:,2] = [100,200]
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 20 14]]
[[  1   2 100   4   5   6   7]
 [  8   9 200  11  12  20  14]]


### 3D array example

In [21]:
b = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [22]:
# check b shape
b.shape

(2, 2, 2)

In [23]:
# Get specific element 
# for example, find 4
#b[0]
#b[0,1]
b[0,1,1]

4

In [24]:
# Get specific element
b[:,1,:]

array([[3, 4],
       [7, 8]])

In [25]:
# Replace
b[:,1,:] = [[9,9],[8,8]]
print(b)

[[[1 2]
  [9 9]]

 [[5 6]
  [8 8]]]


In [26]:
# Replace when using different dimension
b[:,1,:] = [[9,9,9],[8,8]]

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

### Initializing different types of arrays

In [27]:
# All 0s matrix using built-in function
#np.zeros(5)
#np.zeros((2,3))
np.zeros((2,3,2,3))

array([[[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]],


       [[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]]])

In [29]:
# All 1s matrix using built-in function
np.ones((4,2,2), dtype='int32')

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]])

In [30]:
# Any other number
#np.full((2,2), 99)
np.full((2,2), 99, dtype='float32')

array([[99., 99.],
       [99., 99.]], dtype=float32)

In [31]:
# Any other number (full_like) to create a matrix with the same matrix size, here matrix a
np.full_like(a, 4)

array([[4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4]])

In [32]:
# Random decimal numbers using built-in function
#np.random.rand(4,2)
np.random.rand(4,2,3)

array([[[0.51734191, 0.76815278, 0.67399275],
        [0.57783785, 0.6363701 , 0.4654415 ]],

       [[0.95907862, 0.63302527, 0.38056865],
        [0.20168554, 0.38692007, 0.71225836]],

       [[0.8086219 , 0.16489161, 0.07583346],
        [0.80813649, 0.08376291, 0.62594092]],

       [[0.66400456, 0.6370739 , 0.11244635],
        [0.11563044, 0.2245997 , 0.0647684 ]]])

In [33]:
# Random sample using matrix size 
np.random.random_sample(a.shape)

array([[0.3072751 , 0.54405152, 0.80312699, 0.17728205, 0.78291037,
        0.22402403, 0.96978864],
       [0.89441756, 0.49776028, 0.97443932, 0.29618014, 0.78248625,
        0.37998367, 0.15366731]])

In [34]:
# Random integer values using built-in function randint
#np.random.randint(100, size=(5,5))
np.random.randint(-4,20, size=(5,10))

array([[19, -4, 18,  3, -4,  8, -2,  6,  4,  8],
       [ 1, 12, -2, 10, 11,  6, 16,  5, 18, 18],
       [-1, 16, -4, -1, 15,  4,  7,  7,  8, 17],
       [19,  4, -3,  7, 17, 14, -2,  1, -3,  5],
       [-4, -3,  4, 17,  3, -3,  8,  5, -4,  2]])

In [35]:
# Identity matrix
np.identity(10)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [36]:
# Repeat an array
arr = np.array([[1,2,3]])
r1 = np.repeat(arr,3, axis=1)
print(r1)

[[1 1 1 2 2 2 3 3 3]]


In [37]:
# Repeat an array
arr = np.array([[1,2,3]])
r1 = np.repeat(arr,3, axis=0)
print(r1)

[[1 2 3]
 [1 2 3]
 [1 2 3]]


### Let's create this example matrix

```
[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]```

In [38]:
# example to create a given matrix
output = np.ones((5,5))
#print(output)

z = np.zeros((3,3))
z[1,1] = 9
#print(z)

output[1:-1,1:-1] = z
print(output)

[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


### Be careful when copying arrays!

In [39]:
# Copy array
a = np.array([1,2,3])
b = a
b[0] = 100
print(a)

# Why??
# variable b points the same thing as variable a does
# we did not tell numpy to make a copy of what is the contents of variable a
# So, when we change the value in variable b, 
# it also changes the value in variable a
# if want to prevent that we use built-in copy function

[100   2   3]


In [40]:
# Copy array
a = np.array([1,2,3])
b = a.copy() 
b[0] = 100

#print(b)
print(a)

[1 2 3]


### Mathematics
element-wise operations: addition, subtraction, multiplication, division

In [41]:
a = np.array([1,2,3,4])
print(a)

[1 2 3 4]


In [42]:
a + 2

array([3, 4, 5, 6])

In [43]:
a += 2
print(a)

[3 4 5 6]


In [44]:
a - 2

array([1, 2, 3, 4])

In [45]:
a * 2

array([ 6,  8, 10, 12])

In [46]:
a / 2

array([1.5, 2. , 2.5, 3. ])

In [47]:
b = np.array([1,0,1,0])
a + b

array([4, 4, 6, 6])

In [48]:
# power of 2 (a to the second powers)
a ** 2

array([ 9, 16, 25, 36], dtype=int32)

In [49]:
# Take the sin function
np.sin(a)

array([ 0.14112001, -0.7568025 , -0.95892427, -0.2794155 ])

In [50]:
# Take the cos function
np.cos(a)

array([-0.9899925 , -0.65364362,  0.28366219,  0.96017029])

For more details, see the [documentation](https://docs.scipy.org/doc/numpy/reference/routines.math.html)

### Linear Algebra

In [51]:
a = np.ones((2,3))
print(a)

b = np.full((3,2), 2)
print(b)

#a*b # it does not work because of different size of matrice a and b
np.matmul(a,b) # we can use matrix multiplication function

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]


array([[6., 6.],
       [6., 6.]])

In [52]:
# Find the determinant 
c = np.identity(3)
np.linalg.det(c)

1.0

For more details, see the [documentation](https://numpy.org/doc/stable/reference/routines.linalg.html)

Determinant
Trace
Singular Vector Decomposition
Eigenvalues
Matrix Norm
Inverse
etc.

### Statistics

In [53]:
stats = np.array([[1,2,3],[4,5,6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [54]:
np.min(stats)

1

In [55]:
# min value of each row
np.min(stats, axis=1) 

array([1, 4])

In [56]:
np.max(stats)

6

In [57]:
# max value of each row
np.max(stats, axis=1)

array([3, 6])

In [58]:
np.sum(stats)

21

In [59]:
# sum value along each column
np.sum(stats, axis=0)

array([5, 7, 9])

### Reorganizing Arrays

In [62]:
before_array = np.array([[1,2,3,4],[5,6,7,8]])
print(before_array)

after_array = before_array.reshape((8,1))
#after_array = before_array.reshape((4,2))
#after_array = before_array.reshape((2,3))
print(after_array)

[[1 2 3 4]
 [5 6 7 8]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]


In [61]:
# Vertically stacking vectors 
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])
#v2 = np.array([5,6,7,8,9]) # can we stack with mismatched array?
np.vstack([v1,v2,v1,v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [1, 2, 3, 4],
       [5, 6, 7, 8]])

In [66]:
# Horizontally stacking vectors
h1 = np.ones((2,4))
h2 = np.zeros((2,2))

#np.hstack((h1,h2))
np.hstack([h1,h2])

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

### Load data from file

In [67]:
# built-in function called genfromtxt in numpy
data = np.genfromtxt('data.txt', delimiter=',')
data = data.astype('int32') # Copy of the array, cast to a specified type.
print(data)
#data

[[  1  13  21  11 196  75   4   3  34   6   7   8   0   1   2   3   4   5]
 [  3  42  12  33 766  75   4  55   6   4   3   4   5   6   7   0  11  12]
 [  1  22  33  11 999  11   2   1  78   0   1   2   9   8   7   1  76  88]]


### Boolean masking and advanced indexing

In [68]:
data <= 100

array([[ True,  True,  True,  True, False,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True, False,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True, False,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True]])

In [69]:
data[data > 50]

array([196,  75, 766,  75,  55, 999,  78,  76,  88])

In [70]:
# We can index wth a list in numpy, for example
a = np.array([1,2,3,4,5,6,7,8,9,10])
a[[1,2,8]]

array([2, 3, 9])

In [71]:
# figure out if any value in any of these columns > 50
np.any(data > 50, axis=0)

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [74]:
# multiple conditions
((data > 50) & (data < 100))

array([[False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [75]:
# multiple conditions, reverse ~
(~((data > 50) & (data < 100)))

array([[ True,  True,  True,  True,  True, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True, False,  True, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False, False]])

## NumPy vs List

In [76]:
import numpy as np
import time 
import sys

### Memory

### Advantage of NumPy over List in terms of memory

In [77]:
# Memory allocation for List
S = range(1000) # integer numbers between the given start integer to the stop integer
print(sys.getsizeof(5)*len(S)) # get size of for any one element * length of my lists
# Let's check the entire memory 

28000


In [78]:
# Memory allocation for NumPy
D = np.arange(1000) # equivalent to range function
#print(D.size)
#print(D.itemsize)
print(D.size*D.itemsize) # space occupied by one single element * length of my NumPy variable 
# Let's check the entire memory

4000


### Speed

### Advantage of NumPy over List in terms of speed

In [79]:
size = 100000

# List
L1 = range(size)
L2 = range(size)

# NumPy
A1 = np.arange(size)
A2 = np.arange(size)

# Estimate computation time for List
start = time.time()
result = [(x,y) for x,y in zip(L1,L2)]
print((time.time()-start)*1000) # by default in sec, convert to in ms

# Estimate computation time for NumPy
start = time.time()
result = A1+A2
print((time.time()-start)*1000) # by default in sec, convert to in ms

25.03180503845215
3.0028820037841797
