# Numerical Python: numpy 

The NumPy package provides the "ndarray" object for efficient storage and manipulation of dense data arrays in Python. 

The ndarray is similar to the Python list, but can store data of any dimension. 

The elements of a ndarray are all of the same type, and indexed by a tuple of positive integers.

In NumPy dimensions are called axes. The number of axes is rank. 

The ndarray class is also known by the alias array.

In [3]:
import numpy as np 
data = np.random.randn(2, 3) 

In [4]:
data

array([[ 0.11229047, -0.60959023, -0.41856589],
       [ 1.24049198, -1.14652732,  0.97703514]])

In [5]:
data.ndim 

2

In [6]:
data.shape

(2, 3)

In [7]:
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_) 
numeric_strings.astype(float) 

array([ 1.25, -9.6 , 42.  ])

In [14]:
int_array = np.arange(10) 

In [15]:
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64) 
calibers.dtype 

dtype('float64')

In [18]:
int_array.astype(calibers.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

# Operations between Arrays and Scalars 
A vectorized operation is performed on the elements of two arrays on an element-by-element basis. 
Arrays are important because they enable you to express batch operations on data
without writing any for loops. This is usually called vectorization. Any arithmetic operations
between equal-size arrays applies the operation elementwise:

In [19]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr 

array([[1., 2., 3.],
       [4., 5., 6.]])

In [20]:
arr * arr 

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [21]:
arr-arr 

array([[0., 0., 0.],
       [0., 0., 0.]])

In [22]:
1/arr 

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [23]:
arr ** 0.5 

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

# Basic Indexing and Slicing 

In [24]:
arr2=np.arange(10) 

In [25]:
arr2

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
arr2[5:8] 

array([5, 6, 7])

In [27]:
arr2[5:8] = 12
arr2 

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [28]:
arr_slice = arr2[5:8]    
arr_slice[1] = 12345 
arr2 

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [29]:
arr_slice[:] = 64 

In [30]:
arr2 

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [31]:
arr2d = np.array([[1,2,3], [4,5,6], [7,8,9]]) 

In [32]:
arr2d 

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [33]:
arr2d.shape

(3, 3)

In [36]:
# With higher dimensional arrays, you have many more options. In a two-dimensional
# array, the elements at each index are no longer scalars but rather one-dimensional
# arrays:
arr2d[2] 

array([7, 8, 9])

In [37]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [38]:
arr2d[0][2]

3

In [42]:
arr2d[2, 0] 

7

In [44]:
# In multidimensional arrays, if you omit later indices, the returned object will be a lowerdimensional
# ndarray consisting of all the data along the higher dimensions. So in the
# 2 × 2 × 3 array arr3d
arr3d = np.array([[[1,2,3], [4,5,6]], [[7,8,9], [10,11,12]]]) 

In [45]:
arr3d.shape

(2, 2, 3)

In [47]:
arr3d[0].shape

(2, 3)

In [48]:
old_values = arr3d[0].copy() 

In [49]:
arr3d[0] = 42 

In [50]:
arr3d 

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [51]:
arr3d[0]=old_values 
arr3d 

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

# Indexing with slices p/89

In [54]:
# Like one-dimensional objects such as Python lists, ndarrays can be sliced using the
# familiar syntax:
arr2d[:2, 1:] 

array([[2, 3],
       [5, 6]])

In [55]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [58]:
# Higher dimensional objects give you more options as you can slice one or more axes
# and also mix integers. Consider the 2D array above, arr2d. Slicing this array is a bit
# different:
arr2d[:2] 

array([[1, 2, 3],
       [4, 5, 6]])

In [1]:
# As you can see, it has sliced along axis 0, the first axis. A slice, therefore, selects a range
# of elements along an axis. You can pass multiple slices just like you can pass multiple
# indexes:

# Boolean Indexing

In [2]:
import numpy as np 
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe']) 


In [9]:
data = np.random.randn(7, 4)  

In [10]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [11]:
data 

array([[-0.87872917,  0.11531438, -0.02018257, -2.31829058],
       [ 0.96829477,  0.99381466, -0.53575173, -0.81961784],
       [ 0.01152395,  0.12598006, -1.87994786, -0.44954607],
       [-1.37007293, -0.14028989,  0.92443913, -0.09100199],
       [ 0.54503966,  0.29098994,  0.17148998,  0.18028985],
       [ 1.46143437,  0.91809166,  1.74108137,  0.51907012],
       [ 1.29667667, -0.49793133, -1.94684185, -1.78690058]])

![Boolean Indexing](scrn.png) 

In [14]:
# Suppose each name corresponds to a row in the data array. If we wanted to select all
# the rows with corresponding name 'Bob'. Like arithmetic operations, comparisons
# (such as ==) with arrays are also vectorized. Thus, comparing names with the string
# 'Bob' yields a boolean array:
names == 'Bob' 

array([ True, False, False,  True, False, False, False])

In [15]:
# This boolean array can be passed when indexing the array: 
data[names == 'Bob'] 

array([[-0.87872917,  0.11531438, -0.02018257, -2.31829058],
       [-1.37007293, -0.14028989,  0.92443913, -0.09100199]])

In [16]:
# The boolean array must be of the same length as the axis it’s indexing. You can even
# mix and match boolean arrays with slices or integers (or sequences of integers, more
# on this later): 
data[names == 'Bob', 2:] 

array([[-0.02018257, -2.31829058],
       [ 0.92443913, -0.09100199]])

In [17]:
data[names == 'Bob', 3] 

array([-2.31829058, -0.09100199])

In [20]:
# To select everything but 'Bob', you can either use != or negate the condition using -:
names != 'Bob' 

array([False,  True,  True, False,  True,  True,  True])

In [22]:
# data[-(names == 'Bob')] 

In [23]:
# Selecting two of the three names to combine multiple boolean conditions, use boolean
# arithmetic operators like & (and) and | (or):
mask = (names =='Bob') | (names == 'Will') 
mask

array([ True, False,  True,  True,  True, False, False])

In [24]:
data[mask] 

array([[-0.87872917,  0.11531438, -0.02018257, -2.31829058],
       [ 0.01152395,  0.12598006, -1.87994786, -0.44954607],
       [-1.37007293, -0.14028989,  0.92443913, -0.09100199],
       [ 0.54503966,  0.29098994,  0.17148998,  0.18028985]])

In [25]:
# Setting values with boolean arrays works in a common-sense way. To set all of the
# negative values in data to 0 we need only do:
data[data < 0] = 0 

In [26]:
data

array([[0.        , 0.11531438, 0.        , 0.        ],
       [0.96829477, 0.99381466, 0.        , 0.        ],
       [0.01152395, 0.12598006, 0.        , 0.        ],
       [0.        , 0.        , 0.92443913, 0.        ],
       [0.54503966, 0.29098994, 0.17148998, 0.18028985],
       [1.46143437, 0.91809166, 1.74108137, 0.51907012],
       [1.29667667, 0.        , 0.        , 0.        ]])

In [33]:
# Setting whole rows or columns using a one-dimensional boolean array is also easy: 
data[names != 'Joe'] = 7 
data 

array([[7.        , 7.        , 7.        , 7.        ],
       [0.96829477, 0.99381466, 0.        , 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [1.46143437, 0.91809166, 1.74108137, 0.51907012],
       [1.29667667, 0.        , 0.        , 0.        ]])

# Fancy Indexing p/93