# Arithmetic with Numpy Arrays
Arrays are important because they enable you to express batch operations on data without writing any for loops.

In [2]:
# imports
import numpy as np

In [3]:
arr = np.array([[1., 2., 3. ],[4., 5., 6.]])
print(arr)

[[1. 2. 3.]
 [4. 5. 6.]]


In [4]:
# multiplication
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [5]:
# subtraction
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [6]:
# addition
arr + arr

array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])

In [7]:
# division
arr / arr

array([[1., 1., 1.],
       [1., 1., 1.]])

## with scalars
array operation with scalars propagate the scalar argument to each element in the array.

In [8]:
# 1 over the array
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [9]:
# raising to a power
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [11]:
# lets define another array
arr2 = np.array([[0., 4., 1.],[7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [17]:
# comparing two arrays
res = arr > arr2
print("type of res = {}".format(res.dtype))
res

type of res = bool


array([[ True, False,  True],
       [False,  True, False]])

### hehe this concludes arithmetic with numpy

# Basic Indexing and Slicing
There are many various ways we may want to select an item or group of items from an array. Numpy offers many such methods.

In [20]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [21]:
# selecting item at index=5
arr[5]

5

In [22]:
# selecting the sublist of items from 5 to 8 exclusive
arr[5:8]

array([5, 6, 7])

In [25]:
# reassigning a sublist of items to a particular value
arr[5:8] = 13
arr

array([ 0,  1,  2,  3,  4, 13, 13, 13,  8,  9])

In [31]:
# reassigning a sublist of items to a another list
## note that the sublist and list must be of the same length
arr[5:8] = [n for n in range(14,17)]
arr

array([ 0,  1,  2,  3,  4, 14, 15, 16,  8,  9])

### with higher dimension arrays, we have many more options.
In 2D arrays, the elements at each index are no longer scalars but rather 1D arrays.

In [38]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [40]:
# selecting one index
arr2d[1]

array([4, 5, 6])

In [43]:
# individual elements can be accessed recursively
print(arr2d[1][2])

# but this is too much work.
## we could just separate the indexes with a comma
arr2d[1,2]

6


6

In [47]:
# lets look at a 3d array... honestly, I sometimes dont get shit about array sizes above 2
arr3d = np.array(
    [[
        [1,2,3],[4,5,6]],
        [[7,8,9],[10,11,12]
    ]]
)
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [57]:
# arr3d[0] is a 2x3 array
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [66]:
arr3d_copy = arr3d[:].copy()
arr3d_copy[0] = 42
arr3d_copy

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [70]:
# selecting a 1D array from our 3D
arr3d[1,1]

array([10, 11, 12])

In [71]:
# selecting a particular item from our 3D
arr3d[1,1,2]

12

## Indexing with Slices
Like 1D objects such as lists, ndarrays can be sliced with the familiar syntax 

In [74]:
# consider 1D
print("arr = {}".format(arr))
arr[1:6]

arr = [0 1 2 3 4 5 6 7 8 9]


array([1, 2, 3, 4, 5])

In [78]:
# now 2D from arr2d
print("arr2d = {}".format(arr2d))
arr2d[:2]

arr2d = [[1 2 3]
 [4 5 6]
 [7 8 9]]


array([[1, 2, 3],
       [4, 5, 6]])

In [82]:
## you can pass multiple slices like this
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

## Boolean Indexing
Let’s consider an example where we have some data in an array and an array of names
with duplicates

In [99]:
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
data = np.random.randn(7,4)

In [100]:
print("names: \n{}\n".format(names))
print("data:\n{}\n".format(data))

names: 
['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']

data:
[[ 0.45492016 -1.01758399  0.95868632 -0.57930793]
 [ 0.4273157   0.63448765  0.98204208  0.70913764]
 [ 0.71981016 -0.44066946 -1.6481777   0.94001746]
 [ 0.10982096 -2.01443632  0.8234018   0.07794317]
 [ 0.7365184   1.27346899  0.97865155  1.23554474]
 [-0.90378461  0.47986585 -1.19720756  0.64452665]
 [ 0.38842262  0.27939724 -0.68919318 -1.81919661]]



In [101]:
# like in arithmetic, comparisons with arrays are vectorised.
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [102]:
data[names == 'Bob']

array([[ 0.45492016, -1.01758399,  0.95868632, -0.57930793],
       [ 0.10982096, -2.01443632,  0.8234018 ,  0.07794317]])

#### in the following examples, we select from the rows where names == 'Bob' and index the columns too

In [105]:
data[names == 'Bob', 2:]

array([[ 0.95868632, -0.57930793],
       [ 0.8234018 ,  0.07794317]])

In [106]:
data[names == 'Bob', 3:]

array([[-0.57930793],
       [ 0.07794317]])

In [108]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [109]:
data[names != 'Bob']

array([[ 0.4273157 ,  0.63448765,  0.98204208,  0.70913764],
       [ 0.71981016, -0.44066946, -1.6481777 ,  0.94001746],
       [ 0.7365184 ,  1.27346899,  0.97865155,  1.23554474],
       [-0.90378461,  0.47986585, -1.19720756,  0.64452665],
       [ 0.38842262,  0.27939724, -0.68919318, -1.81919661]])

In [117]:
cond = names == 'Bob' # an np boolean array
data[~cond] # ~cond is the inverse of cond

array([[ 0.4273157 ,  0.63448765,  0.98204208,  0.70913764],
       [ 0.71981016, -0.44066946, -1.6481777 ,  0.94001746],
       [ 0.7365184 ,  1.27346899,  0.97865155,  1.23554474],
       [-0.90378461,  0.47986585, -1.19720756,  0.64452665],
       [ 0.38842262,  0.27939724, -0.68919318, -1.81919661]])

In [124]:
# selecting one of either names using an OR operator
mask = (names == 'Bob') | (names =='Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [125]:
data[mask]

array([[ 0.45492016, -1.01758399,  0.95868632, -0.57930793],
       [ 0.71981016, -0.44066946, -1.6481777 ,  0.94001746],
       [ 0.10982096, -2.01443632,  0.8234018 ,  0.07794317],
       [ 0.7365184 ,  1.27346899,  0.97865155,  1.23554474]])

##### python keywords 'and' and 'or' use & and | instead on numpy boolean arrays

In [129]:
data[data < 0] = 0
data

array([[0.45492016, 0.        , 0.95868632, 0.        ],
       [0.4273157 , 0.63448765, 0.98204208, 0.70913764],
       [0.71981016, 0.        , 0.        , 0.94001746],
       [0.10982096, 0.        , 0.8234018 , 0.07794317],
       [0.7365184 , 1.27346899, 0.97865155, 1.23554474],
       [0.        , 0.47986585, 0.        , 0.64452665],
       [0.38842262, 0.27939724, 0.        , 0.        ]])

In [133]:
# setting whole columns or rows using a 1D entity is also easy
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.4273157 , 0.63448765, 0.98204208, 0.70913764],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.47986585, 0.        , 0.64452665],
       [0.38842262, 0.27939724, 0.        , 0.        ]])

## Fancy Indexing
This is a term adopted by NumPy to describe indexing using integer arrays.


In [137]:
# take an 8X4 array for example
arr = np.empty((8,4))
arr

array([[1.24557190e-316, 0.00000000e+000, 6.90141436e-310,
        6.90141436e-310],
       [6.90143046e-310, 6.90143049e-310, 6.90143037e-310,
        6.90143040e-310],
       [6.90143041e-310, 6.90141436e-310, 6.90141439e-310,
        6.90141436e-310],
       [6.90143042e-310, 6.90143049e-310, 6.90141436e-310,
        6.90141436e-310],
       [6.90141436e-310, 6.90141436e-310, 6.90143047e-310,
        6.90142997e-310],
       [6.90141436e-310, 6.90141436e-310, 6.90143037e-310,
        6.90143050e-310],
       [6.90143051e-310, 6.90143051e-310, 6.90143051e-310,
        6.90141436e-310],
       [6.90143051e-310, 6.90143034e-310, 6.90143048e-310,
        6.90143042e-310]])

In [139]:
for i in range(8): arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [140]:
# to select out a subset of rows in a particular order, you can simply pass a list or ndarray of integers specifying the order
arr[[4,6,0,3]]

array([[4., 4., 4., 4.],
       [6., 6., 6., 6.],
       [0., 0., 0., 0.],
       [3., 3., 3., 3.]])

In [148]:
# or you can use negative indices to select rows from the end
arr[[-4, -2, -8, -5]]

array([[4., 4., 4., 4.],
       [6., 6., 6., 6.],
       [0., 0., 0., 0.],
       [3., 3., 3., 3.]])

## Transposing Arrays and Swapping Axes


In [151]:
arr = np.arange(15).reshape(3,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [152]:
# transpose of arr
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])