In [2]:
import numpy as np

# 4 NumPy Basics: Arrays and Vectorized Computation

In [3]:
my_arr = np.arange(1_000_000)
my_list = list(range(1_000_000))

# Now let's multiply each* 2
%timeit my_arr2 = my_arr * 2 309 us +- 7.48 us per loop (mean +- std. dev. of 7 runs, 1000 loops each)
%timeit my_list2 = [x * 2 for x in my_list] 46.4 ms +- 526 us per loop (mean +- std. dev. of 7 runs, 10 loops each)

SyntaxError: invalid syntax (<unknown>, line 1)

# 4.1 The NumPy ndarray: A Multidimensional Array Object

In [4]:
# Arrays enable you to perform mathematical operations on whole blocks of data using similar syntax to the equivalent operations between scalar elements.
data = np.array([[1.5, -0.1, 3], [0, -3, 6.5]])
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [5]:
# Mathematical operations with data:
# All of the elements have been multiplied by 10.
data * 10

array([[ 15.,  -1.,  30.],
       [  0., -30.,  65.]])

In [6]:
# Values in each "cell" in the array have been added to each other.
data + data

array([[ 3. , -0.2,  6. ],
       [ 0. , -6. , 13. ]])

### An ndarray is a generic multidimensional container for homogeneous data; that is, all of the elements must be the same type. Every array has a shape, a tuple indicating the size of each dimension, and a dtype, an object describing the data type of the array:

In [7]:
data.shape

(2, 3)

In [8]:
data.dtype

dtype('float64')

# Creating ndarrays

### The easiest way to create an array is to use the array function. This accepts any sequence-like object (including other arrays) and produces a new NumPy array containing the passed data.

In [9]:
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [10]:
# Nested sequences, like a list of equal-length lists, will be converted into a multidimensional array:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [11]:
# you can confirm dimensions of lists and arrays by inspecting the ndim and shape attributes:
arr2.ndim
arr2.shape

(2, 4)

In [12]:
# numpy.array tries to infer a good data type for the array that it creates.
# The data type is stored in a special dtype metadata object

arr1.dtype

dtype('float64')

In [13]:
arr2.dtype

dtype('int64')

In [14]:
# To create a higher dimensional array with these methods, pass a tuple for the shape:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [15]:
np.zeros((3, 6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [16]:
np.empty((2, 3, 2))

array([[[-0.00000000e+000, -3.10503639e+231],
        [ 2.96439388e-323,  0.00000000e+000],
        [ 0.00000000e+000,  0.00000000e+000]],

       [[ 0.00000000e+000,  0.00000000e+000],
        [ 0.00000000e+000,  7.25192305e+169],
        [ 1.49166824e-154,  8.34402841e-309]]])

In [17]:
# numpy.arange is an array-valued version of the built-in Python range function:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [21]:
# Table 4.1: Some important NumPy array creation functions

Function	Description

array:	Convert input data (list, tuple, array, or other sequence type) to an ndarray either by inferring a data type or explicitly specifying a data type; copies the input data by default

asarray: Convert input to ndarray, but do not copy if the input is already an ndarray

arange:	Like the built-in range but returns an ndarray instead of a list

ones, ones_like:	Produce an array of all 1s with the given shape and data type; ones_like takes another array and produces a ones array of the same shape and data type

zeros, zeros_like:	Like ones and ones_like but producing arrays of 0s instead

empty, empty_like:	Create new arrays by allocating new memory, but do not populate with any values like ones and zeros

full, full_like:	Produce an array of the given shape and data type with all values set to the indicated "fill value"; full_like takes another array and produces a filled array of the same shape and data type

eye, identity: Create a square N × N identity matrix (1s on the diagonal and 0s elsewhere)


SyntaxError: invalid syntax (199366207.py, line 3)

## Data Types for ndarrays

#### The data type or dtype is a special object containing the information (or metadata, data about data) the ndarray needs to interpret a chunk of memory as a particular type of data:

In [19]:
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)

arr1.dtype
arr2.dtype

dtype('int32')

In [23]:
# Table 4.2: NumPy data types

Type	Type code	Description

int8, uint8:	i1, u1:	Signed and unsigned 8-bit (1 byte) integer types

int16, uint16:	i2, u2:	Signed and unsigned 16-bit integer types
i
nt32, uint32:	i4, u4:	Signed and unsigned 32-bit integer types

int64, uint64:	i8, u8:	Signed and unsigned 64-bit integer types

float16:	f2:	Half-precision floating point

float32:	f4 or f:	Standard single-precision floating point; compatible with C float

float64:	f8 or d:	Standard double-precision floating point; compatible with C double and Python float object

float128: f16 or g:	Extended-precision floating point

complex64, complex128, complex256:	c8, c16, c32:	Complex numbers represented by two 32, 64, or 128 floats, respectively

bool:	?:	Boolean type storing True and False values

object:	O:	Python object type; a value can be any Python object

string_:	S:	Fixed-length ASCII string type (1 byte per character); for example, to create a string data type with length 10, use 'S10'

unicode_:	U:	Fixed-length Unicode type (number of bytes platform specific); same specification semantics as string_ (e.g., 'U10')

SyntaxError: invalid syntax (4073800581.py, line 3)

In [24]:
# You can explicitly convert or cast an array from one data type to another using ndarray’s astype method:
arr = np.array([1, 2, 3, 4, 5])
arr.dtype

dtype('int64')

In [26]:
float_arr = arr.astype(np.float64)
float_arr

array([1., 2., 3., 4., 5.])

In [27]:
float_arr.dtype

dtype('float64')

In [28]:
# Casting some floating-point numbers to be of integer data type, the dicmal becomes truncated:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr

array([ 3.7, -1.2, -2.6,  0.5, 12.9, 10.1])

In [29]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

In [30]:
# If you have an array of strings representing numbers, you can use astype to convert them to numeric form:
numeric_strings = np.array(["1.25", "-9.6", "42"], dtype=np.string_)
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

In [32]:
# You can use another array's dtype attribute:
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)
int_array.astype(calibers.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [33]:
# shorthand type code strings you can also use to refer to a dtype:
zeros_uint32 = np.zeros(8, dtype="u4")
zeros_uint32

array([0, 0, 0, 0, 0, 0, 0, 0], dtype=uint32)

#### Calling astype always creates a new array (a copy of the data), even if the new data type is the same as the old data type.

# Arithmetic with NumPy Arrays

### Arrays are important because they enable you to express batch operations on data without writing any for loops.

### NumPy users call this vectorization. 

In [35]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [36]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [37]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [38]:
# Arithmetic operations with scalars propagate the scalar argument to each element in the array:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [39]:
arr ** 2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [40]:
# Comparisons between arrays of the same size yield Boolean arrays:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [41]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

### Evaluating operations between differently sized arrays is called broadcasting

# Basic Indexing and Slicing

In [42]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [43]:
arr[5]

5

In [44]:
arr[5:8]

array([5, 6, 7])

In [45]:
# If you assign a scalar value to a slice, as in arr[5:8] = 12, the value is propagated (or broadcast henceforth) to the entire selection.
arr[5:8] = 12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [49]:
# Using slices
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [50]:
# When values are changes in arr_slice, the mutations are reflected in the original array arr:
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [51]:
# The "bare" slice [:] will assign to all values in an array:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [52]:
# In a two-dimensional array, the elements at each index are no longer scalars but rather one-dimensional arrays:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]

array([7, 8, 9])

In [53]:
# pass a comma-separated list of indices to select individual elements. 
arr2d[0][2]

3

In [54]:
arr2d[0, 2]

3

In [55]:
# In multidimensional arrays, if you omit later indices, the returned object will be a lower dimensional ndarray consisting of all the data along the higher dimensions.
# 2 × 2 × 3 array arr3d:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [56]:
# arr3d[0] is a 2 × 3 array:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [57]:
# Both scalar values and arrays can be assigned to arr3d[0]:
old_values = arr3d[0].copy()
arr3d[0] = 42
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [58]:
arr3d[0] = old_values
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [59]:
# Similarly, arr3d[1, 0] gives you all of the values whose indices start with (1, 0), forming a one-dimensional array:
arr3d[1, 0]

array([7, 8, 9])

In [60]:
x = arr3d[1]
x

array([[ 7,  8,  9],
       [10, 11, 12]])

In [61]:
x[0]

array([7, 8, 9])

In [62]:
# This multidimensional indexing syntax for NumPy arrays will not work with regular Python objects, such as lists of lists.

#### Indexing with slices

In [63]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [64]:
arr[1:6]

array([ 1,  2,  3,  4, 64])

In [65]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [66]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [67]:
# You can pass multiple slices just like you can pass multiple indexes:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [68]:
# When slicing like this, you always obtain array views of the same number of dimensions. 
# By mixing integer indexes and slices, you get lower dimensional slices.
lower_dim_slice = arr2d[1, :2]

In [69]:
# While arr2d is two-dimensional, lower_dim_slice is one-dimensional, and its shape is a tuple with one axis size:
lower_dim_slice.shape

(2,)

In [70]:
arr2d[:2, 2]

array([3, 6])

## Note that a colon by itself means to take the entire axis, so you can slice only higher dimensional axes by doing:

In [71]:
# Assigning to a slice expression assigns to the whole selection:
arr2d[:2, 1:] = 0
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

## Boolean Indexing

In [72]:
names = np.array(["Bob", "Joe", "Will", "Bob", "Will", "Joe", "Joe"])
data = np.array([[4, 7], [0, 2], [-5, 6], [0, 0], [1, 2], [-12, -4], [3, 4]])
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [73]:
data

array([[  4,   7],
       [  0,   2],
       [ -5,   6],
       [  0,   0],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

In [74]:
names == "Bob"

array([ True, False, False,  True, False, False, False])

In [75]:
data[names == "Bob"]

array([[4, 7],
       [0, 0]])

In [76]:
# Boolean array must be of the same length as the array axis it’s indexing:
data[names == "Bob", 1]

array([7, 0])

In [77]:
# To select everything but "Bob" you can either use != or negate the condition using ~:
names != "Bob"

array([False,  True,  True, False,  True,  True,  True])

In [78]:
~(names == "Bob")

array([False,  True,  True, False,  True,  True,  True])

In [79]:
data[~(names == "Bob")]

array([[  0,   2],
       [ -5,   6],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

In [80]:
# The ~ operator can be useful when you want to invert a Boolean array referenced by a variable:
cond = names == "Bob"
data[~cond]

array([[  0,   2],
       [ -5,   6],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

In [81]:
# To select two of the three names to combine multiple Boolean conditions, use Boolean arithmetic operators like & (and) and | (or):
mask = (names == "Bob") | (names == "Will")
mask

array([ True, False,  True,  True,  True, False, False])

In [82]:
data[mask]

array([[ 4,  7],
       [-5,  6],
       [ 0,  0],
       [ 1,  2]])

In [83]:
# Setting values with Boolean arrays works by substituting the value or values on the righthand side into the locations where the Boolean array's values are True.
# To set all of the negative values in data to 0, we need only do:
data[data < 0] = 0
data

array([[4, 7],
       [0, 2],
       [0, 6],
       [0, 0],
       [1, 2],
       [0, 0],
       [3, 4]])

In [84]:
# You can also set whole rows or columns using a one-dimensional Boolean array:
data[names != "Joe"] = 7
data

array([[7, 7],
       [0, 2],
       [7, 7],
       [7, 7],
       [7, 7],
       [0, 0],
       [3, 4]])

## Fancy Indexing

#### Fancy indexing is a term adopted by NumPy to describe indexing using integer arrays. Suppose we had an 8 × 4 array:

In [85]:
arr = np.zeros((8, 4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])