## Numpy code along


In [1]:
# 'np' is the conventional alias for numpy
import numpy as np

#### Numpy arrays

In [2]:
a = np.array([5,3,2])

In [3]:
a

array([5, 3, 2])

#### Generate an array with random numbers

In [4]:
np.random.seed(123)
a = np.random.random((6,3))
a

array([[0.69646919, 0.28613933, 0.22685145],
       [0.55131477, 0.71946897, 0.42310646],
       [0.9807642 , 0.68482974, 0.4809319 ],
       [0.39211752, 0.34317802, 0.72904971],
       [0.43857224, 0.0596779 , 0.39804426],
       [0.73799541, 0.18249173, 0.17545176]])

### Other ways to create arrays

#### List to array

This works the same way whether you have a list of lists, a list of tuples, a tuple of lists, or a tuple of tuples.

In [5]:
lst_lst = [[1,2,3],[4,5,6],[7,8,9]]
d = np.array(lst_lst)
print(d)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


#### Constant arrays

In [6]:
a = np.zeros((2,2))  # Create an array of all zeros
print(a, "\n")

b = np.ones((1,2))   # Create an array of all ones
print(b, "\n")

c = np.full((2,2), 7) # Create a constant array
print(c)

[[0. 0.]
 [0. 0.]] 

[[1. 1.]] 

[[7 7]
 [7 7]]


#### Sequential arrays

In [7]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [8]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [9]:
# Create a 3x3 array of normally distributed random values
# with mean 0 and standard deviation 1
np.random.normal(0, 1, (3, 3))

array([[ 2.20593008,  2.18678609,  1.0040539 ],
       [ 0.3861864 ,  0.73736858,  1.49073203],
       [-0.93583387,  1.17582904, -1.25388067]])

In [10]:
# Create a 3x3 array of random integers in the interval [0, 10)
np.random.randint(0, 10, (3, 3))

array([[1, 8, 3],
       [5, 0, 2],
       [6, 2, 4]])

#### Array Attributes

In [11]:
np.random.seed(123)
a = np.random.random((6,3))
a

array([[0.69646919, 0.28613933, 0.22685145],
       [0.55131477, 0.71946897, 0.42310646],
       [0.9807642 , 0.68482974, 0.4809319 ],
       [0.39211752, 0.34317802, 0.72904971],
       [0.43857224, 0.0596779 , 0.39804426],
       [0.73799541, 0.18249173, 0.17545176]])

In [12]:
a.shape

(6, 3)

In [13]:
a.size

18

In [14]:
a.ndim

2

In [15]:
a.dtype

dtype('float64')

In [16]:
b = np.array([1,3,4])

In [17]:
b.dtype

dtype('int64')

In [18]:
b.itemsize

8

In [19]:
a.itemsize

8

In [20]:
c = np.array([1,3,4], dtype="int8") # smaller integers

In [21]:
c.itemsize

1

In [22]:
c.nbytes

3

In [23]:
b.nbytes

24

#### Array indexing

In [24]:
a[0]

array([0.69646919, 0.28613933, 0.22685145])

In [25]:
a[0,0]

0.6964691855978616

In [26]:
a[0,-1]

0.2268514535642031

In [27]:
a[0:2]

array([[0.69646919, 0.28613933, 0.22685145],
       [0.55131477, 0.71946897, 0.42310646]])

In [28]:
a[0,1:3]

array([0.28613933, 0.22685145])

#### Modifying a slice will modify the array

In [29]:
a[0,0]

0.6964691855978616

In [30]:
a[0,0] = 10

In [31]:
a

array([[10.        ,  0.28613933,  0.22685145],
       [ 0.55131477,  0.71946897,  0.42310646],
       [ 0.9807642 ,  0.68482974,  0.4809319 ],
       [ 0.39211752,  0.34317802,  0.72904971],
       [ 0.43857224,  0.0596779 ,  0.39804426],
       [ 0.73799541,  0.18249173,  0.17545176]])

#### Boolean indexing

In [32]:
bool_idx = (a > 0.7)
bool_idx

array([[ True, False, False],
       [False,  True, False],
       [ True, False, False],
       [False, False,  True],
       [False, False, False],
       [ True, False, False]])

In [33]:
print(a[bool_idx])
print(a[a > 0.7]) # in a single expression

[10.          0.71946897  0.9807642   0.72904971  0.73799541]
[10.          0.71946897  0.9807642   0.72904971  0.73799541]


#### Reshaping arrays

In [34]:
grid = np.arange(1, 10).reshape((3, 3))
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


#### Subarrays return views, not copies!

In [35]:
a

array([[10.        ,  0.28613933,  0.22685145],
       [ 0.55131477,  0.71946897,  0.42310646],
       [ 0.9807642 ,  0.68482974,  0.4809319 ],
       [ 0.39211752,  0.34317802,  0.72904971],
       [ 0.43857224,  0.0596779 ,  0.39804426],
       [ 0.73799541,  0.18249173,  0.17545176]])

In [36]:
# we take a slice of array "a" and store it in a new variable "a_chunk"
a_chunk = a[1:3, 0:2]
a_chunk

array([[0.55131477, 0.71946897],
       [0.9807642 , 0.68482974]])

In [37]:
# modifying "a_chung" also modifies "a"
a_chunk[0,0] = 0
print(a_chunk)
print("\n")
print(a)

[[0.         0.71946897]
 [0.9807642  0.68482974]]


[[10.          0.28613933  0.22685145]
 [ 0.          0.71946897  0.42310646]
 [ 0.9807642   0.68482974  0.4809319 ]
 [ 0.39211752  0.34317802  0.72904971]
 [ 0.43857224  0.0596779   0.39804426]
 [ 0.73799541  0.18249173  0.17545176]]


#### Creating copies

In [38]:
a_copy = a.copy()

In [39]:
# modifying a copy does not modify the original
a_copy[0,0] = 0
print(a_copy, "\n")
print(a)

[[0.         0.28613933 0.22685145]
 [0.         0.71946897 0.42310646]
 [0.9807642  0.68482974 0.4809319 ]
 [0.39211752 0.34317802 0.72904971]
 [0.43857224 0.0596779  0.39804426]
 [0.73799541 0.18249173 0.17545176]] 

[[10.          0.28613933  0.22685145]
 [ 0.          0.71946897  0.42310646]
 [ 0.9807642   0.68482974  0.4809319 ]
 [ 0.39211752  0.34317802  0.72904971]
 [ 0.43857224  0.0596779   0.39804426]
 [ 0.73799541  0.18249173  0.17545176]]


#### 3-D arrays

In [40]:
b = np.random.random((5,2,3))
print(b)

[[[0.53155137 0.53182759 0.63440096]
  [0.84943179 0.72445532 0.61102351]]

 [[0.72244338 0.32295891 0.36178866]
  [0.22826323 0.29371405 0.63097612]]

 [[0.09210494 0.43370117 0.43086276]
  [0.4936851  0.42583029 0.31226122]]

 [[0.42635131 0.89338916 0.94416002]
  [0.50183668 0.62395295 0.1156184 ]]

 [[0.31728548 0.41482621 0.86630916]
  [0.25045537 0.48303426 0.98555979]]]


### 4-D arrays

In [41]:
c = np.random.random((2,3,4,5))
print(c)

[[[[0.51948512 0.61289453 0.12062867 0.8263408  0.60306013]
   [0.54506801 0.34276383 0.30412079 0.41702221 0.68130077]
   [0.87545684 0.51042234 0.66931378 0.58593655 0.6249035 ]
   [0.67468905 0.84234244 0.08319499 0.76368284 0.24366637]]

  [[0.19422296 0.57245696 0.09571252 0.88532683 0.62724897]
   [0.72341636 0.01612921 0.59443188 0.55678519 0.15895964]
   [0.15307052 0.69552953 0.31876643 0.6919703  0.55438325]
   [0.38895057 0.92513249 0.84167    0.35739757 0.04359146]]

  [[0.30476807 0.39818568 0.70495883 0.99535848 0.35591487]
   [0.76254781 0.59317692 0.6917018  0.15112745 0.39887629]
   [0.2408559  0.34345601 0.51312815 0.66662455 0.10590849]
   [0.13089495 0.32198061 0.66156434 0.84650623 0.55325734]]]


 [[[0.85445249 0.38483781 0.3167879  0.35426468 0.17108183]
   [0.82911263 0.33867085 0.55237008 0.57855147 0.52153306]
   [0.00268806 0.98834542 0.90534158 0.20763586 0.29248941]
   [0.52001015 0.90191137 0.98363088 0.25754206 0.56435904]]

  [[0.80696868 0.39437005 0.73


### Operations:

- np.sum, np.multiply, np.power...

- np.mean, np.std...

In [42]:
x = np.array([1,2,3])
y = np.array([4,5,6])
x ** y  # works with *, /, **

array([  1,  32, 729])

Lists do not behave the same way: sum means concatenation

In [43]:
[1,2,3] + [4,5,6]

[1, 2, 3, 4, 5, 6]

In [44]:
x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])
print("x", "\n", x)
print("y", "\n", y)

x 
 [[1 2]
 [3 4]]
y 
 [[5 6]
 [7 8]]


In [45]:
print (x + y)

[[ 6  8]
 [10 12]]


In [46]:
# Mean of each column in matrix a
print(np.mean(a, axis=0))
  
# Mean of each row in matrix a
print(np.mean(a, axis=1))

# Mean of all the elements in the first two groups of array b
np.mean(b[:2])

[2.09157489 0.37929761 0.40557259]
[3.50433026 0.38085848 0.71550861 0.48811508 0.2987648  0.36531296]


0.536902908522835

In [47]:
# compute the standard deviation of this array, first using np.std() and then without using this function
np.random.seed(123)
rand = np.random.random(10)

In [48]:
squared_deviations = (rand - np.mean(rand))**2
squared_deviations

array([2.31861019e-02, 6.65949729e-02, 1.00709689e-01, 5.06291464e-05,
       3.07194386e-02, 1.46634887e-02, 1.90588864e-01, 1.97769054e-02,
       4.00277042e-03, 2.31288845e-02])

In [49]:
np.sqrt(squared_deviations.mean())

0.21758256938579879

#### Multiplying bones broken & favourite numbers

In [50]:
# we use np.Nan for the missing data
bones_broken = np.array([0,3,0,1,0,1,0,4,3,2,2,0,0,2,2,4,0,np.NaN,0,0,1,0,np.NaN,1,0,0,0])
bones_broken

array([ 0.,  3.,  0.,  1.,  0.,  1.,  0.,  4.,  3.,  2.,  2.,  0.,  0.,
        2.,  2.,  4.,  0., nan,  0.,  0.,  1.,  0., nan,  1.,  0.,  0.,
        0.])

In [51]:
# we will replace missing data with the rounded mean
nan_replacement = round(np.nanmean(bones_broken))

In [52]:
bones_broken = np.nan_to_num(bones_broken, nan=nan_replacement)
bones_broken

array([0., 3., 0., 1., 0., 1., 0., 4., 3., 2., 2., 0., 0., 2., 2., 4., 0.,
       1., 0., 0., 1., 0., 1., 1., 0., 0., 0.])

In [53]:
fav_num = np.array([7,4,24,6,8,7,7,13,3,5,13,0,np.NaN,14,7,7,7,np.NaN,23,9,4,24,19,8,10,0,8])

In [54]:
# we will replace missing data with the mode

from scipy.stats import mode # numpy doesn't have mode, we import it from scipy
most_freq_fav_num = mode(fav_num)
print(most_freq_fav_num) # this fives both the mode and its freq count
print(most_freq_fav_num[0]) # we just need the actual mode

ModeResult(mode=array([7.]), count=array([6]))
[7.]


In [55]:
fav_num = np.nan_to_num(fav_num, nan=most_freq_fav_num[0])
fav_num

array([ 7.,  4., 24.,  6.,  8.,  7.,  7., 13.,  3.,  5., 13.,  0.,  7.,
       14.,  7.,  7.,  7.,  7., 23.,  9.,  4., 24., 19.,  8., 10.,  0.,
        8.])

In [56]:
# now we can multiply both arrays
bones_broken * fav_num

array([ 0., 12.,  0.,  6.,  0.,  7.,  0., 52.,  9., 10., 26.,  0.,  0.,
       28., 14., 28.,  0.,  7.,  0.,  0.,  4.,  0., 19.,  8.,  0.,  0.,
        0.])

#### Performance of numpy operations vs lists

In [57]:
from time import time

n = 1000000

start_time = time()

big_slow_list = []

for i in range(1, n):
    big_slow_list.append(i**3)

end_time = time()
    
print(end_time - start_time)

0.6917178630828857


In [58]:
n = 1000000

start_time = time()

big_fast_array = np.arange(1,100000)**3

end_time = time()
    
print(end_time - start_time)

0.0014209747314453125


#### Concatenate

In [59]:
first = np.array([[1,2,3],[4,5,6]])
second = np.array([[0,0,0], [9,9,9]])

In [60]:
first

array([[1, 2, 3],
       [4, 5, 6]])

In [61]:
second

array([[0, 0, 0],
       [9, 9, 9]])

In [62]:
np.concatenate([first, second])

array([[1, 2, 3],
       [4, 5, 6],
       [0, 0, 0],
       [9, 9, 9]])

In [63]:
np.concatenate([first, second], axis=1)

array([[1, 2, 3, 0, 0, 0],
       [4, 5, 6, 9, 9, 9]])

In [64]:
one_dim = np.array([1,1,1])

In [65]:
np.vstack([first, one_dim])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 1, 1]])

### Transpose

In [66]:
print(x)

[[1 2]
 [3 4]]


In [67]:
print(x.T)

[[1 3]
 [2 4]]


#### Splitting arrays

In [68]:
hundred = np.array(range(1,101))

In [69]:
hundred

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

In [70]:
first_half, second_half = np.split(hundred, [50])

In [71]:
first_half

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])

In [72]:
second_half

array([ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
        64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,
        77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
        90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100])

### Broadcasting

In [73]:
# We will add the vector v to each row of the matrix x,
# storing the result in the matrix y
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
y = np.empty_like(x)   # Create an empty matrix with the same shape as x

# Add the vector v to each row of the matrix x with an explicit loop
for i in range(4):
    y[i, :] = x[i, :] + v

print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


This works; however when the matrix x is very large, computing an explicit loop in Python could be slow. Note that adding the vector v to each row of the matrix x is equivalent to forming a matrix vv by stacking multiple copies of v vertically, then performing elementwise summation of x and vv. We could implement this approach like this:

In [74]:
vv = np.tile(v, (4, 1))  # Stack 4 copies of v on top of each other
print(vv)                # Prints "[[1 0 1]
                         #          [1 0 1]
                         #          [1 0 1]
                         #          [1 0 1]]"

[[1 0 1]
 [1 0 1]
 [1 0 1]
 [1 0 1]]


In [75]:
y = x + vv  # Add x and vv elementwise
print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [76]:
y = x + v  # Add v to each row of x using broadcasting
print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


[how broadcasting works](https://numpy.org/doc/stable/user/basics.broadcasting.html)

Some applications of bradcasting:

In [77]:
# Compute outer product of vectors
v = np.array([1,2,3])  # v has shape (3,)
w = np.array([4,5])    # w has shape (2,)
# To compute an outer product, we first reshape v to be a column
# vector of shape (3, 1); we can then broadcast it against w to yield
# an output of shape (3, 2), which is the outer product of v and w:

print(np.reshape(v, (3, 1)) * w)

[[ 4  5]
 [ 8 10]
 [12 15]]


In [78]:
# Add a vector to each row of a matrix
x = np.array([[1,2,3], [4,5,6]])
# x has shape (2, 3) and v has shape (3,) so they broadcast to (2, 3),
# giving the following matrix:

print(x + v)

[[2 4 6]
 [5 7 9]]


In [79]:
# Add a vector to each column of a matrix
# x has shape (2, 3) and w has shape (2,).
# If we transpose x then it has shape (3, 2) and can be broadcast
# against w to yield a result of shape (3, 2); transposing this result
# yields the final result of shape (2, 3) which is the matrix x with
# the vector w added to each column. Gives the following matrix:

print((x.T + w).T)

[[ 5  6  7]
 [ 9 10 11]]


In [80]:
x = c[0,0]

print("x", "\n", x)

y = c[0,1]
print("y", "\n", y)

x 
 [[0.51948512 0.61289453 0.12062867 0.8263408  0.60306013]
 [0.54506801 0.34276383 0.30412079 0.41702221 0.68130077]
 [0.87545684 0.51042234 0.66931378 0.58593655 0.6249035 ]
 [0.67468905 0.84234244 0.08319499 0.76368284 0.24366637]]
y 
 [[0.19422296 0.57245696 0.09571252 0.88532683 0.62724897]
 [0.72341636 0.01612921 0.59443188 0.55678519 0.15895964]
 [0.15307052 0.69552953 0.31876643 0.6919703  0.55438325]
 [0.38895057 0.92513249 0.84167    0.35739757 0.04359146]]


In [81]:
# Add elements of x and y together
print(np.add(x, y))

[[0.71370808 1.18535148 0.21634118 1.71166763 1.2303091 ]
 [1.26848436 0.35889304 0.89855267 0.9738074  0.84026041]
 [1.02852736 1.20595187 0.98808021 1.27790685 1.17928675]
 [1.06363963 1.76747493 0.92486499 1.12108041 0.28725784]]


In [82]:
# Subtract elements of x from elements of y
print(np.subtract(y, x))

[[-0.32526216 -0.04043757 -0.02491615  0.05898603  0.02418884]
 [ 0.17834835 -0.32663463  0.29031109  0.13976298 -0.52234112]
 [-0.72238633  0.18510719 -0.35054736  0.10603374 -0.07052025]
 [-0.28573848  0.08279005  0.75847501 -0.40628527 -0.20007491]]


In [83]:
# Multiply elements of x and y together
print(np.multiply(x, y))

[[0.10089594 0.35085574 0.01154567 0.73158168 0.37826885]
 [0.39431111 0.00552851 0.18077909 0.23219179 0.10829933]
 [0.13400663 0.35501381 0.21335476 0.40545069 0.34643603]
 [0.26242069 0.77927836 0.07002273 0.27293839 0.01062177]]


In [84]:
# Divide elements of y by elements of x
print(np.divide(y, x))

[[ 0.37387589  0.93402198  0.79344753  1.0713822   1.04011017]
 [ 1.32720385  0.04705633  1.9545914   1.33514517  0.23331787]
 [ 0.17484644  1.36265496  0.47625857  1.18096455  0.88715017]
 [ 0.57648864  1.09828551 10.11683533  0.46799214  0.17889815]]
