In [1]:
import numpy as np

In [5]:
# numpy array
data = np.array([[1.5, -0.1, 3], [0, -3, 6.5]])
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [6]:
# basic operations
data * 10, data + data

(array([[ 15.,  -1.,  30.],
        [  0., -30.,  65.]]),
 array([[ 3. , -0.2,  6. ],
        [ 0. , -6. , 13. ]]))

In [7]:
# array info
d_dim = data.ndim
d_shape = data.shape
d_type = data.dtype

d_dim, d_shape, d_type

(2, (2, 3), dtype('float64'))

## Default Arrays

Creation functions: https://wesmckinney.com/book/numpy-basics#tbl-table_array_ctor

In [8]:
# zeros
np.zeros((2, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [9]:
# ones
np.ones((3, 3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [11]:
# empty
# this gives random values based on memory
# so only use if you are going to set the values later
np.empty((1, 5))

array([[ 3. ,  0.2,  6. ,  6. , 13. ]])

In [12]:
# eye
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

## Data Types

Data Types: https://wesmckinney.com/book/numpy-basics#tbl-table_array_dtypes

In [15]:
# int array with specified data type
int_arr = np.array([1, 2, 3], dtype=np.int32)
int_arr

array([1, 2, 3], dtype=int32)

In [16]:
# convert between data types
# note: this can truncate data
int_arr.astype(np.float16)

array([1., 2., 3.], dtype=float16)

## Indexing and Slicing

Indexing and slicing doesn't give a new array. So, any modifications will reflected in the original array.

In [21]:
vect = np.array([1,2,3,4,5,6,7])
arr = np.array([[1,2,3],[4,5,6]])
vect, arr

(array([1, 2, 3, 4, 5, 6, 7]),
 array([[1, 2, 3],
        [4, 5, 6]]))

In [25]:
# get one element
one_el = vect[3]
one_a_el = arr[1,2]

one_el, one_a_el

(4, 6)

In [26]:
# or a few elements
mult_el = vect[2:5]
mult_a_el = arr[0,:]

mult_el, mult_a_el

(array([3, 4, 5]), array([1, 2, 3]))

In [31]:
# modify original array from a slice
new_vect = vect[2:5]
new_vect[0] = 10

new_vect, vect

(array([10,  4,  5]), array([ 1,  2, 10,  4,  5,  6,  7]))

## Boolean Indexing

In [35]:
# values in the array that are greater than 2
arr > 2

array([[False, False,  True],
       [ True,  True,  True]])

In [36]:
# returning values that are greater than 2
arr[arr > 2]

array([3, 4, 5, 6])

## Transposing and Reshaping

In [44]:
# what do those arrays look like again?
vect, arr

(array([ 1,  2, 10,  4,  5,  6,  7]),
 array([[1, 2, 3],
        [4, 5, 6]]))

In [52]:
# transpose with the build in T attribute
# or with the swapaxes function like arr.swapaxes(0,1)
arr.T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [53]:
# reshape the array, but it must fit all the data
arr.reshape(1,6)[0], arr.reshape(3,2)

(array([1, 2, 3, 4, 5, 6]),
 array([[1, 2],
        [3, 4],
        [5, 6]]))

## Fast Element-Wise Functions

Single array functions:
https://wesmckinney.com/book/numpy-basics#tbl-table_unary_ufuncs

Multiple array functions:
https://wesmckinney.com/book/numpy-basics#tbl-table_binary_ufuncs

Random number generator functions:
https://wesmckinney.com/book/numpy-basics#tbl-table_numpy_random

In [57]:
# create a random number generator
rng = np.random.default_rng(seed=12345)

# create some random vectors
x = rng.standard_normal(8)
y = rng.standard_normal(8)

x, y

(array([-1.42382504,  1.26372846, -0.87066174, -0.25917323, -0.07534331,
        -0.74088465, -1.3677927 ,  0.6488928 ]),
 array([ 0.36105811, -1.95286306,  2.34740965,  0.96849691, -0.75938718,
         0.90219827, -0.46695317, -0.06068952]))

In [59]:
# find the maximum values between the two
np.maximum(x, y)

array([ 0.36105811,  1.26372846,  2.34740965,  0.96849691, -0.07534331,
        0.90219827, -0.46695317,  0.6488928 ])

In [67]:
# round each cell value
np.rint(x)

array([-1.,  1., -1., -0., -0., -1., -1.,  1.])

## Conditional Logic

In [69]:
# create a randomly filled array
arr = rng.standard_normal((4, 4))
arr

array([[-0.95898831, -1.20938829, -1.41229201,  0.54154683],
       [ 0.7519394 , -0.65876032, -1.22867499,  0.25755777],
       [ 0.31290292, -0.13081169,  1.26998312, -0.09296246],
       [-0.06615089, -1.10821447,  0.13595685,  1.34707776]])

In [71]:
# make all the negative cells 0
# where(condition, if true, if false)
np.where(arr < 0, 0, arr)

array([[0.        , 0.        , 0.        , 0.54154683],
       [0.7519394 , 0.        , 0.        , 0.25755777],
       [0.31290292, 0.        , 1.26998312, 0.        ],
       [0.        , 0.        , 0.13595685, 1.34707776]])

## Statistical Methods

Stat functions: https://wesmckinney.com/book/numpy-basics#tbl-table_statistical_methods

In [81]:
arr = rng.standard_normal((5, 4))
arr

array([[-1.99585661, -0.15524762,  1.06383087, -0.27517157],
       [-1.85333593, -0.12434193,  0.78497452,  0.2019986 ],
       [-0.42807444,  1.8482889 ,  1.89995289, -0.09842503],
       [ 0.81344544,  0.39249439,  0.7814429 ,  1.45327152],
       [ 0.82018605,  0.08770534, -0.65350565, -0.81188688]])

In [82]:
# find the mean of the entire array
# or along an axis (0 is within each column, 1 is across each row)
arr.mean(), arr.mean(axis=1)

(0.18758728798285568,
 array([-0.34061123, -0.24767618,  0.80543558,  0.86016356, -0.13937528]))

## Sorting

Calling sort on the array will perform the sort in place. Calling the np.sort() function will perform the sort on a copy of the array.

In [84]:
# sorting a vector
arr = rng.standard_normal(6)
arr, np.sort(arr)

(array([ 0.28208603, -0.62361213,  1.12182226,  0.84122103, -0.7758961 ,
         0.41071644]),
 array([-0.7758961 , -0.62361213,  0.28208603,  0.41071644,  0.84122103,
         1.12182226]))

In [85]:
# sorting a matrix across an axis (0 is within each column, and 1 is across each row)
arr = rng.standard_normal((5, 3))
arr, np.sort(arr, axis=1)

(array([[-2.7224161 , -0.6733048 ,  1.24622153],
        [ 0.79020803,  0.17534089, -0.0292946 ],
        [-1.41951426, -1.35996632,  0.22341156],
        [ 1.76177943, -2.17088985,  0.62848817],
        [ 0.60119653,  0.95075786, -0.86924667]]),
 array([[-2.7224161 , -0.6733048 ,  1.24622153],
        [-0.0292946 ,  0.17534089,  0.79020803],
        [-1.41951426, -1.35996632,  0.22341156],
        [-2.17088985,  0.62848817,  1.76177943],
        [-0.86924667,  0.60119653,  0.95075786]]))

## Linear Algebra

Linear algebra functions: https://wesmckinney.com/book/numpy-basics#tbl-table_numpy_linalg

In [86]:
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])
x, y

(array([[1., 2., 3.],
        [4., 5., 6.]]),
 array([[ 6., 23.],
        [-1.,  7.],
        [ 8.,  9.]]))

In [87]:
# matrix multiplication
x.dot(y)

array([[ 28.,  64.],
       [ 67., 181.]])