# Arithmetic with Numpy Arrays
Arrays are important because they enable you to express batch operations on data without writing any for loops.

In [1]:
# imports
import numpy as np

In [2]:
arr = np.array([[1., 2., 3. ],[4., 5., 6.]])
print(arr)

[[1. 2. 3.]
 [4. 5. 6.]]


In [3]:
# multiplication
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [4]:
# subtraction
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [5]:
# addition
arr + arr

array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])

In [6]:
# division
arr / arr

array([[1., 1., 1.],
       [1., 1., 1.]])

## with scalars
array operation with scalars propagate the scalar argument to each element in the array.

In [7]:
# 1 over the array
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [8]:
# raising to a power
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [9]:
# lets define another array
arr2 = np.array([[0., 4., 1.],[7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [10]:
# comparing two arrays
res = arr > arr2
print("type of res = {}".format(res.dtype))
res

type of res = bool


array([[ True, False,  True],
       [False,  True, False]])

### hehe this concludes arithmetic with numpy

# Basic Indexing and Slicing
There are many various ways we may want to select an item or group of items from an array. Numpy offers many such methods.

In [11]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
# selecting item at index=5
arr[5]

5

In [13]:
# selecting the sublist of items from 5 to 8 exclusive
arr[5:8]

array([5, 6, 7])

In [14]:
# reassigning a sublist of items to a particular value
arr[5:8] = 13
arr

array([ 0,  1,  2,  3,  4, 13, 13, 13,  8,  9])

In [15]:
# reassigning a sublist of items to a another list
## note that the sublist and list must be of the same length
arr[5:8] = [n for n in range(14,17)]
arr

array([ 0,  1,  2,  3,  4, 14, 15, 16,  8,  9])

### with higher dimension arrays, we have many more options.
In 2D arrays, the elements at each index are no longer scalars but rather 1D arrays.

In [16]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [17]:
# selecting one index
arr2d[1]

array([4, 5, 6])

In [18]:
# individual elements can be accessed recursively
print(arr2d[1][2])

# but this is too much work.
## we could just separate the indexes with a comma
arr2d[1,2]

6


6

In [19]:
# lets look at a 3d array... honestly, I sometimes dont get shit about array sizes above 2
arr3d = np.array(
    [[
        [1,2,3],[4,5,6]],
        [[7,8,9],[10,11,12]
    ]]
)
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [20]:
# arr3d[0] is a 2x3 array
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [21]:
arr3d_copy = arr3d[:].copy()
arr3d_copy[0] = 42
arr3d_copy

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [22]:
# selecting a 1D array from our 3D
arr3d[1,1]

array([10, 11, 12])

In [23]:
# selecting a particular item from our 3D
arr3d[1,1,2]

12

## Indexing with Slices
Like 1D objects such as lists, ndarrays can be sliced with the familiar syntax 

In [24]:
# consider 1D
print("arr = {}".format(arr))
arr[1:6]

arr = [ 0  1  2  3  4 14 15 16  8  9]


array([ 1,  2,  3,  4, 14])

In [25]:
# now 2D from arr2d
print("arr2d = {}".format(arr2d))
arr2d[:2]

arr2d = [[1 2 3]
 [4 5 6]
 [7 8 9]]


array([[1, 2, 3],
       [4, 5, 6]])

In [26]:
## you can pass multiple slices like this
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

## Boolean Indexing
Let’s consider an example where we have some data in an array and an array of names
with duplicates

In [27]:
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
data = np.random.randn(7,4)

In [28]:
print("names: \n{}\n".format(names))
print("data:\n{}\n".format(data))

names: 
['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']

data:
[[ 1.51304513  1.01816049  0.8450459   0.73293198]
 [-0.37397703  0.81453046  0.93984053 -2.00924   ]
 [ 1.01817751  0.6427766  -0.76026008 -0.52487897]
 [-0.63305105 -0.30309812 -0.29662755  1.08315803]
 [-0.38781626  1.80604884  0.33419771  1.461282  ]
 [-0.70994812 -0.741191   -0.84906134 -0.15482992]
 [-1.85716565 -0.16193655 -1.66551575 -1.47362042]]



In [29]:
# like in arithmetic, comparisons with arrays are vectorised.
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [30]:
data[names == 'Bob']

array([[ 1.51304513,  1.01816049,  0.8450459 ,  0.73293198],
       [-0.63305105, -0.30309812, -0.29662755,  1.08315803]])

#### in the following examples, we select from the rows where names == 'Bob' and index the columns too

In [31]:
data[names == 'Bob', 2:]

array([[ 0.8450459 ,  0.73293198],
       [-0.29662755,  1.08315803]])

In [32]:
data[names == 'Bob', 3:]

array([[0.73293198],
       [1.08315803]])

In [33]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [34]:
data[names != 'Bob']

array([[-0.37397703,  0.81453046,  0.93984053, -2.00924   ],
       [ 1.01817751,  0.6427766 , -0.76026008, -0.52487897],
       [-0.38781626,  1.80604884,  0.33419771,  1.461282  ],
       [-0.70994812, -0.741191  , -0.84906134, -0.15482992],
       [-1.85716565, -0.16193655, -1.66551575, -1.47362042]])

In [35]:
cond = names == 'Bob' # an np boolean array
data[~cond] # ~cond is the inverse of cond

array([[-0.37397703,  0.81453046,  0.93984053, -2.00924   ],
       [ 1.01817751,  0.6427766 , -0.76026008, -0.52487897],
       [-0.38781626,  1.80604884,  0.33419771,  1.461282  ],
       [-0.70994812, -0.741191  , -0.84906134, -0.15482992],
       [-1.85716565, -0.16193655, -1.66551575, -1.47362042]])

In [36]:
# selecting one of either names using an OR operator
mask = (names == 'Bob') | (names =='Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [37]:
data[mask]

array([[ 1.51304513,  1.01816049,  0.8450459 ,  0.73293198],
       [ 1.01817751,  0.6427766 , -0.76026008, -0.52487897],
       [-0.63305105, -0.30309812, -0.29662755,  1.08315803],
       [-0.38781626,  1.80604884,  0.33419771,  1.461282  ]])

##### python keywords 'and' and 'or' use & and | instead on numpy boolean arrays

In [38]:
data[data < 0] = 0
data

array([[1.51304513, 1.01816049, 0.8450459 , 0.73293198],
       [0.        , 0.81453046, 0.93984053, 0.        ],
       [1.01817751, 0.6427766 , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 1.08315803],
       [0.        , 1.80604884, 0.33419771, 1.461282  ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ]])

In [39]:
# setting whole columns or rows using a 1D entity is also easy
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.81453046, 0.93984053, 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ]])

## Fancy Indexing
This is a term adopted by NumPy to describe indexing using integer arrays.


In [40]:
# take an 8X4 array for example
arr = np.empty((8,4))
arr

array([[1.87797593e-316, 0.00000000e+000, 2.14321575e-312,
        2.46151512e-312],
       [2.33419537e-312, 6.79038654e-313, 2.20687562e-312,
        6.79038654e-313],
       [2.48273508e-312, 2.29175545e-312, 2.44029516e-312,
        6.79038654e-313],
       [2.16443571e-312, 2.22809558e-312, 2.14321575e-312,
        2.44029516e-312],
       [2.16443571e-312, 2.35541533e-312, 6.79038654e-313,
        6.79038653e-313],
       [2.35541533e-312, 1.18831764e-312, 2.14321575e-312,
        2.10077583e-312],
       [2.48273508e-312, 2.22809558e-312, 2.14321575e-312,
        2.05833592e-312],
       [2.41907520e-312, 1.12465777e-312, 1.18831764e-312,
        4.59481051e-322]])

In [41]:
for i in range(8): arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [42]:
# to select out a subset of rows in a particular order, you can simply pass a list or ndarray of integers specifying the order
arr[[4,6,0,3]]

array([[4., 4., 4., 4.],
       [6., 6., 6., 6.],
       [0., 0., 0., 0.],
       [3., 3., 3., 3.]])

In [43]:
# or you can use negative indices to select rows from the end
arr[[-4, -2, -8, -5]]

array([[4., 4., 4., 4.],
       [6., 6., 6., 6.],
       [0., 0., 0., 0.],
       [3., 3., 3., 3.]])

## Transposing Arrays and Swapping Axes


In [44]:
arr = np.arange(15).reshape(3,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [45]:
# transpose of arr
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [47]:
# we can compute the inner matrix product using np.dot
arr = np.random.randn(6,3)
print("arr:\n{}\n".format(arr))
np.dot(arr.T, arr)

arr:
[[ 0.79992221 -0.97939526  2.00248236]
 [-0.30157945  0.20375107 -1.66609168]
 [ 1.34576001  1.25967461  0.02015686]
 [-1.42052327 -0.51247132 -0.64987809]
 [-0.275198    0.76359439 -0.09769875]
 [ 0.13709558  0.0588036   1.25336696]]



array([[ 4.65431121,  1.37623206,  3.25329996],
       [ 1.37623206,  3.43667082, -1.94315446],
       [ 3.25329996, -1.94315446,  8.7890187 ]])

In [53]:
# for higher dimen arrays, transpose will accept a tuple of axis numbers to permute the axes
arr = np.arange(16).reshape(2,2,4)
print("arr:\n{}\n".format(arr))

arr:
[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]



In [56]:
arr.transpose((1,0,2))

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [57]:
# we can also just do swapaxes
arr.swapaxes(1,2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])