# Intro to Numerical Computing with NumPy | SciPy 2018

In [1]:
a = [1, 2, 3, 4]

In [2]:
b = [10, 11, 12, 13]

In [3]:
a + b

[1, 2, 3, 4, 10, 11, 12, 13]

In [4]:
output = []

In [5]:
for item1, item2 in zip(a, b):
    output.append(item1 + item2)

In [6]:
output

[11, 13, 15, 17]

In [7]:
g = list(range(1000000))

In [8]:
%timeit sum(g)

50.3 ms ± 5.09 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
import numpy as np

In [10]:
g_array = np.array(g)

In [11]:
%timeit np.sum(g_array)

753 µs ± 27 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [12]:
a

[1, 2, 3, 4]

In [13]:
a = np.array([1, 2, 3, 4])

In [14]:
b

[10, 11, 12, 13]

In [15]:
b = np.array([10, 11, 12, 13])

In [16]:
a

array([1, 2, 3, 4])

In [17]:
b

array([10, 11, 12, 13])

In [18]:
a + b

array([11, 13, 15, 17])

In [19]:
a - b

array([-9, -9, -9, -9])

In [20]:
a * b

array([10, 22, 36, 52])

In [21]:
a / b

array([0.1       , 0.18181818, 0.25      , 0.30769231])

In [22]:
a % b

array([1, 2, 3, 4], dtype=int32)

In [23]:
a ** b

array([       1,     2048,   531441, 67108864], dtype=int32)

### Simple array creation

In [24]:
a = np.array([1, 2, 3, 4])

In [25]:
a

array([1, 2, 3, 4])

### Checking the type

In [26]:
type(a)

numpy.ndarray

### Numeric "type" of elements

In [27]:
a.dtype

dtype('int32')

### Number of dimensions

In [28]:
a.ndim

1

### Array shape

In [29]:
# Shape returns a tuple listing the length of the array along each dimension.
a.shape

(4,)

### Bytes per element

In [30]:
a.itemsize

4

### Bytes of memory used

In [31]:
# Return the number of bytes used by the data portion of the array.
a.nbytes

16

In [32]:
f = 5

In [33]:
type(f)

int

In [34]:
f = (1, 2)

In [35]:
type(f)

tuple

In [36]:
(10)

10

In [37]:
10,

(10,)

In [38]:
a.shape

(4,)

In [39]:
b.shape

(4,)

In [40]:
a

array([1, 2, 3, 4])

In [41]:
b

array([10, 11, 12, 13])

In [42]:
a * b

array([10, 22, 36, 52])

In [43]:
a * 10

array([10, 20, 30, 40])

In [44]:
a * 100

array([100, 200, 300, 400])

In [45]:
np.sin(a)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [46]:
np.exp(a)

array([ 2.71828183,  7.3890561 , 20.08553692, 54.59815003])

In [47]:
np.log(a)

array([0.        , 0.69314718, 1.09861229, 1.38629436])

In [48]:
np.log

<ufunc 'log'>

In [49]:
np.sum

<function numpy.sum(a, axis=None, dtype=None, out=None, keepdims=<no value>, initial=<no value>)>

In [50]:
sum

<function sum(iterable, start=0, /)>

### Array indexing

In [51]:
a[0]

1

In [52]:
a[0] = 10

In [53]:
a

array([10,  2,  3,  4])

### Beware of type coercion

In [54]:
a.dtype

dtype('int32')

In [55]:
# Assigning a float into an int32 array truncates the decimal part
a[0] = 10.6

In [56]:
a

array([10,  2,  3,  4])

In [57]:
# fill has the same behavior
a.fill(-4.8)

In [58]:
a

array([-4, -4, -4, -4])

In [59]:
a1 = np.array([1, 2, 3, 4])

In [60]:
a.dtype

dtype('int32')

In [61]:
a2 = np.array([1, 2, 3, 4.0])

In [62]:
a2.dtype

dtype('float64')

In [63]:
a3 = np.array([1, 2, 3, 4.0 + 1j])

In [64]:
a3.dtype

dtype('complex128')

In [65]:
a4 = np.array([1, 2, 3, 4.0], dtype='int32')

In [66]:
a4.dtype

dtype('int32')

In [67]:
a4

array([1, 2, 3, 4])

In [68]:
c = np.array([[10, 11, 12], [20, 21, 22]])

In [69]:
c

array([[10, 11, 12],
       [20, 21, 22]])

In [70]:
c.dtype

dtype('int32')

In [71]:
c.ndim

2

In [72]:
c.shape

(2, 3)

In [73]:
a

array([-4, -4, -4, -4])

In [74]:
a4

array([1, 2, 3, 4])

In [75]:
a4.T

array([1, 2, 3, 4])

In [76]:
c

array([[10, 11, 12],
       [20, 21, 22]])

In [77]:
c.size

6

In [78]:
c.nbytes

24

In [79]:
a4

array([1, 2, 3, 4])

In [80]:
a4[0]

1

In [81]:
a4[0] = 10

In [82]:
a4

array([10,  2,  3,  4])

In [83]:
c

array([[10, 11, 12],
       [20, 21, 22]])

In [84]:
c[0]

array([10, 11, 12])

In [85]:
c[1]

array([20, 21, 22])

In [86]:
c[0, 0]

10

## Slicing

In [87]:
# var[lower:upper:step]
# Extracts a portion of a sequence by specifing a lower and upper bound.
# The lower-bound element is included, but the upper-bound element is not included.
# Mathematically: [lower, upper].
# The step value specifies the stride between elements.

### Slicing arrays

In [88]:
#           -5 -4 -3 -2 -1
# indices:   0  1  2  3  4

a5 = np.array([10, 11, 12, 13, 14])

In [89]:
a5

array([10, 11, 12, 13, 14])

In [90]:
a5[1:3]

array([11, 12])

In [91]:
# negative indices work also
a5[-4:3]

array([11, 12])

### Omitting indices

In [92]:
# omitted boundaries are assumed to be the beginning (or end) of the list
# grab first three elements

a[:3]

array([-4, -4, -4])

In [93]:
a5[:3]

array([10, 11, 12])

In [94]:
# grab last two elements
a5[-2:]

array([13, 14])

In [95]:
# every other element
a5[::2]

array([10, 12, 14])

In [96]:
a6 = np.arange(25).reshape(5, 5)

In [97]:
a6

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [98]:
red = a6[:, 1::2]

In [99]:
red

array([[ 1,  3],
       [ 6,  8],
       [11, 13],
       [16, 18],
       [21, 23]])

In [100]:
yellow = a6[4]

In [101]:
yellow

array([20, 21, 22, 23, 24])

In [102]:
yellow2 = a6[4, :]

In [103]:
yellow2

array([20, 21, 22, 23, 24])

In [104]:
yellow3 = a6[-1]

In [105]:
yellow3

array([20, 21, 22, 23, 24])

In [106]:
yellow4 = a6[-1, :]

In [107]:
yellow4

array([20, 21, 22, 23, 24])

In [108]:
blue = a6[1::2, :3:2]

In [109]:
blue

array([[ 5,  7],
       [15, 17]])

In [110]:
blue2 = a6[1::2, :4:2]

In [111]:
blue2

array([[ 5,  7],
       [15, 17]])

In [112]:
blue3 = a6[1::2, :-1:2]

In [113]:
blue3

array([[ 5,  7],
       [15, 17]])

In [114]:
blue4 = a6[1::2, :-2:2]

In [115]:
blue4

array([[ 5,  7],
       [15, 17]])

In [116]:
red

array([[ 1,  3],
       [ 6,  8],
       [11, 13],
       [16, 18],
       [21, 23]])

In [117]:
red[-1, -1]

23

In [118]:
red[-1, -1] = 0

In [119]:
red

array([[ 1,  3],
       [ 6,  8],
       [11, 13],
       [16, 18],
       [21,  0]])

In [120]:
a6

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22,  0, 24]])

In [121]:
id(a6)

1926252832048

In [122]:
id(red)

1926251508640

In [123]:
red.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [124]:
a.data

<memory at 0x000001C07D9A9AC8>

In [125]:
red.data

<memory at 0x000001C07D843F28>

In [126]:
red.copy()

array([[ 1,  3],
       [ 6,  8],
       [11, 13],
       [16, 18],
       [21,  0]])

## Fancy Indexing

### Indexing by position

In [127]:
a7 = np.arange(0, 80, 10)

In [128]:
a7

array([ 0, 10, 20, 30, 40, 50, 60, 70])

In [129]:
# fancy indexing
indices = [1, 2, -3]
y = a7[indices]
y

array([10, 20, 50])

### Indexing with booleans

In [130]:
# manual creation of masks
mask = np.array([0, 1, 1, 0, 0, 1, 0, 0], dtype=bool)

In [131]:
# conditional creation of masks
mask2 = a7 < 30

In [132]:
# fancy indexing
y2 = a7[mask]
y2

array([10, 20, 50])

In [133]:
a8 = np.array([3, -1, -2, 4, -6, 8])

In [134]:
a8

array([ 3, -1, -2,  4, -6,  8])

In [135]:
a8 < 0

array([False,  True,  True, False,  True, False])

In [136]:
negatives = a8 < 0

In [137]:
a8[negatives]

array([-1, -2, -6])

In [138]:
a[a < 0]

array([-4, -4, -4, -4])

In [139]:
a8[a8 < 0]

array([-1, -2, -6])

In [140]:
a8[a8 < 0] = 0

In [141]:
a8

array([3, 0, 0, 4, 0, 8])

In [142]:
a8 < 8

array([ True,  True,  True,  True,  True, False])

In [143]:
a8 > 3

array([False, False, False,  True, False,  True])

In [144]:
a8 > 3 and a8 < 8

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [145]:
a8 < 8

array([ True,  True,  True,  True,  True, False])

In [146]:
(a8 < 8).any()

True

In [147]:
# Binary operators: and, or, not
# Bitwise operators: & (and), | (or), ~ (not), ^ (xor)

In [148]:
(a8 > 3) & (a8 < 8)

array([False, False, False,  True, False, False])

In [149]:
a8

array([3, 0, 0, 4, 0, 8])

In [150]:
f2 = 6

In [151]:
g2 = 9

In [152]:
f2 + g2

15

In [153]:
f.__add__(g2)

TypeError: can only concatenate tuple (not "int") to tuple

In [154]:
f2

6

In [155]:
g2

9

In [156]:
negatives

array([False,  True,  True, False,  True, False])

In [157]:
np.nonzero(negatives)

(array([1, 2, 4], dtype=int64),)

In [158]:
a8

array([3, 0, 0, 4, 0, 8])

In [159]:
a8.sort()

In [160]:
a8

array([0, 0, 0, 3, 4, 8])

In [161]:
a9 = np.array([10, 1, 20])

In [162]:
b2 = np.array([2, 3, 20])

In [163]:
a9 > b2

array([ True, False, False])

In [164]:
a9

array([10,  1, 20])

In [165]:
subset = a9[[0, 2]]

In [166]:
subset

array([10, 20])

In [167]:
a9.flags.owndata

True

In [168]:
subset.flags.owndata

True

In [169]:
a9 is subset

False

In [170]:
a9

array([10,  1, 20])

In [171]:
subset

array([10, 20])

In [172]:
subset[0] = -1

In [173]:
subset

array([-1, 20])

In [174]:
a9

array([10,  1, 20])

In [175]:
a9[-1] = 100

In [176]:
a9

array([ 10,   1, 100])

In [177]:
a10 = np.arange(25).reshape(5, 5)

In [178]:
a10

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [179]:
0 % 2

0

In [180]:
1 % 2

1

In [181]:
2 % 2

0

In [182]:
3 % 2

1

In [183]:
4 % 2

0

In [184]:
a10 % 3

array([[0, 1, 2, 0, 1],
       [2, 0, 1, 2, 0],
       [1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1],
       [2, 0, 1, 2, 0]], dtype=int32)

In [185]:
a10[a10 % 3]

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9]],

       [[10, 11, 12, 13, 14],
        [ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [ 0,  1,  2,  3,  4]],

       [[ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],

       [[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9]],

       [[10, 11, 12, 13, 14],
        [ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [ 0,  1,  2,  3,  4]]])

In [186]:
a10 % 3 == 0

array([[ True, False, False,  True, False],
       [False,  True, False, False,  True],
       [False, False,  True, False, False],
       [ True, False, False,  True, False],
       [False,  True, False, False,  True]])

In [187]:
a10[a10 % 3 == 0]

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24])

In [188]:
np.nan

nan

In [189]:
output2 = np.empty_like(a10)

In [190]:
output2

array([[0, 1, 2, 0, 1],
       [2, 0, 1, 2, 0],
       [1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1],
       [2, 0, 1, 2, 0]])

In [191]:
output3 = np.empty_like(a10, dtype='float')

In [192]:
output3

array([[6.23042070e-307, 4.67296746e-307, 1.69121096e-306,
        9.34606395e-307, 1.33508845e-306],
       [1.33511969e-306, 6.23037996e-307, 6.23053954e-307,
        9.34609790e-307, 8.45593934e-307],
       [9.34600963e-307, 1.86921143e-306, 6.23061763e-307,
        6.23053954e-307, 2.22522597e-306],
       [1.06810268e-306, 6.23052935e-307, 1.11261434e-306,
        1.78018811e-306, 1.20160711e-306],
       [1.42418172e-306, 1.37961641e-306, 1.27946076e-307,
        8.45610231e-307, 3.33504588e-317]])

In [193]:
output3.fill(np.nan)

In [194]:
output3

array([[nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan]])

In [195]:
a10

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [196]:
mask3 = a10 % 3 == 0

In [197]:
a10[mask3]

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24])

In [198]:
output3[mask3]

array([nan, nan, nan, nan, nan, nan, nan, nan, nan])

In [199]:
output3[mask3] = a10[mask3]

In [200]:
output3

array([[ 0., nan, nan,  3., nan],
       [nan,  6., nan, nan,  9.],
       [nan, nan, 12., nan, nan],
       [15., nan, nan, 18., nan],
       [nan, 21., nan, nan, 24.]])

In [201]:
np.where(a10 % 3 == 0, a10, np.nan)

array([[ 0., nan, nan,  3., nan],
       [nan,  6., nan, nan,  9.],
       [nan, nan, 12., nan, nan],
       [15., nan, nan, 18., nan],
       [nan, 21., nan, nan, 24.]])

In [202]:
a10[mask3]

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24])

In [203]:
output3[mask3]

array([ 0.,  3.,  6.,  9., 12., 15., 18., 21., 24.])

## Multi-Dimensional Arrays

### Visualizing multi-dimensional arrays

### Computations with Arrays

In [204]:
# Rule 1: Operations between multiple array objects are first checked for proper shape match.
# Rule 2: Mathematical operators (+, -, *, /, %, exp, log, ...) apply element by element, on the values.
# Rule 3: Reduction operations (mean, std, skew, kurt, sum, prod, ...) apply to the whole array, unless an axis is specified.
# Rule 4: Missing values propagate unless explicitly ignored (nanmean, nansum, ...)

In [205]:
np.nan + 6

nan

In [206]:
np.sum([1, np.nan, 9])

nan

In [207]:
np.nansum([1, np.nan, 9])

10.0

In [208]:
a10

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [209]:
a10[mask3]

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24])

In [210]:
a10

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [211]:
a10[a10 % 3 == 0].sum()

108

## Array Calculation Methods

### SUM Method

In [212]:
# Methods act on data stored in the array
a11 = np.array([[1, 2, 3], [4, 5, 6]])

In [213]:
a11

array([[1, 2, 3],
       [4, 5, 6]])

In [214]:
# .sum() defaults to adding up all the value in an array.
a11.sum()

21

In [215]:
# supply the keyword axis to sum along the 0th axis
a11.sum(axis=0)

array([5, 7, 9])

In [216]:
# supply the keyword axis to sum along the last axis
a11.sum(axis=-1)

array([ 6, 15])

## Min / Max

### Min

In [217]:
a12 = np.array([2., 3., 0., 1.])

In [218]:
a12.min(axis=0)

0.0

In [219]:
# Use NumPy's min() instead of Python's built-in min()
# for speedy operations on multi-dimensional arrays.
np.min(a12, axis=0)

0.0

### Argmin

In [220]:
# Find index of minimum value.
a12.argmin(axis=0)

2

In [221]:
# as a function
np.argmin(a12, axis=0)

2

### Max

In [222]:
a12.max(axis=0)

3.0

In [223]:
# as a function
np.max(a12, axis=0)

3.0

### Argmax

In [224]:
# Find index of maximum value.
a12.argmax(axis=0)

1

In [225]:
# as a function
np.argmax(a12, axis=0)

1