# NumPy Data Science Essentials
## Create NumPy arrays
### Create arrays from Python structures

In [1]:
import numpy as np

In [2]:
print(np.__version__)

1.14.0


In [3]:
a_list = [-17, 0, 4, 5, 9] # list
array_from_list = np.array(a_list) # numpy array
array_from_list

array([-17,   0,   4,   5,   9])

In [4]:
array_from_list*10

array([-170,    0,   40,   50,   90])

In [5]:
a_tuple = (14, -4.54, 4+8j)
np.array(a_tuple)

array([14.  +0.j, -4.54+0.j,  4.  +8.j])

**Difference between Python and NumPy data structures**

In [6]:
a_tuple*3

(14, -4.54, (4+8j), 14, -4.54, (4+8j), 14, -4.54, (4+8j))

In [7]:
np.array(a_tuple)*3

array([ 42.   +0.j, -13.62 +0.j,  12.  +24.j])

### Intrinsic creation using NumPy methods

In [8]:
np.arange(7)

array([0, 1, 2, 3, 4, 5, 6])

In [9]:
np.arange(10, 23)

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22])

In [10]:
len(np.arange(10, 23))

13

In [11]:
a_range_array = np.arange(10, 23)
a_range_array.size

13

In [12]:
np.arange(10, 23).size

13

In [13]:
np.arange(10, 23, 5)

array([10, 15, 20])

In [14]:
np.arange(10, 26, 5)

array([10, 15, 20, 25])

### `linspace()`, `zeros()`, `ones()`, and NumPy data types

In [15]:
np.linspace(5, 15, 9)

array([ 5.  ,  6.25,  7.5 ,  8.75, 10.  , 11.25, 12.5 , 13.75, 15.  ])

In [16]:
a_linspace = np.linspace(5, 15, 9, retstep=True)
a_linspace

(array([ 5.  ,  6.25,  7.5 ,  8.75, 10.  , 11.25, 12.5 , 13.75, 15.  ]), 1.25)

In [17]:
a_linspace[1]

1.25

In [18]:
np.zeros([5, 3])

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [19]:
np.zeros([5, 3, 2])

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

In [20]:
np.ones([3, 3])

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [21]:
np.ones([3, 3], dtype=np.int64)

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

## Index, Slice, and Iterate
### Slice arrays

In [22]:
a_vector = np.array([-17, 0, 4, 5, 9])
a_vector

array([-17,   0,   4,   5,   9])

In [23]:
a_vector[2]

4

In [24]:
a_vector[2] = -231
a_vector

array([ -17,    0, -231,    5,    9])

In [25]:
a_vector[-3]

-231

In [26]:
a_vector.size

5

In [27]:
a_array = np.arange(35).reshape([7, 5])
a_array

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34]])

In [28]:
a_array[2]

array([10, 11, 12, 13, 14])

In [29]:
a_array[-2]

array([25, 26, 27, 28, 29])

In [30]:
a_array[5, 2]

27

In [31]:
row = 5
col = 2
a_array[row, col]

27

In [32]:
a_array[row][col]

27

In [33]:
a_3d_array = np.arange(70).reshape(2, 7, 5)
a_3d_array

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34]],

       [[35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49],
        [50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59],
        [60, 61, 62, 63, 64],
        [65, 66, 67, 68, 69]]])

In [34]:
a_3d_array[1]

array([[35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44],
       [45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54],
       [55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64],
       [65, 66, 67, 68, 69]])

In [35]:
a_3d_array[1, 3]

array([50, 51, 52, 53, 54])

In [36]:
a_3d_array[1, 3, 2]

52

In [37]:
a_3d_array[1, 3, 2] = 1111
a_3d_array

array([[[   0,    1,    2,    3,    4],
        [   5,    6,    7,    8,    9],
        [  10,   11,   12,   13,   14],
        [  15,   16,   17,   18,   19],
        [  20,   21,   22,   23,   24],
        [  25,   26,   27,   28,   29],
        [  30,   31,   32,   33,   34]],

       [[  35,   36,   37,   38,   39],
        [  40,   41,   42,   43,   44],
        [  45,   46,   47,   48,   49],
        [  50,   51, 1111,   53,   54],
        [  55,   56,   57,   58,   59],
        [  60,   61,   62,   63,   64],
        [  65,   66,   67,   68,   69]]])

### Boolean mask arrays

In [38]:
a_vector = np.array([-16, 2, 4, 5, 9])
a_vector

array([-16,   2,   4,   5,   9])

In [39]:
zero_mod_2_mask = 0 == (a_vector % 2)
zero_mod_2_mask

array([ True,  True,  True, False, False])

In [40]:
sub_array = a_vector[zero_mod_2_mask]
sub_array

array([-16,   2,   4])

In [41]:
sub_array[sub_array > 0]

array([2, 4])

#### NumPy logical operators

In [42]:
mod_test = 0 == (a_vector % 2)
mod_test

array([ True,  True,  True, False, False])

In [43]:
positive_test = a_vector > 0
positive_test

array([False,  True,  True,  True,  True])

In [44]:
combined_mask = np.logical_and(mod_test, positive_test)
combined_mask

array([False,  True,  True, False, False])

In [45]:
a_vector[combined_mask]

array([2, 4])

### Broadcasting

In [46]:
a_3d_array = np.arange(70).reshape(2, 7, 5)
a_3d_array

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34]],

       [[35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49],
        [50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59],
        [60, 61, 62, 63, 64],
        [65, 66, 67, 68, 69]]])

In [47]:
a_3d_array.shape

(2, 7, 5)

In [48]:
a_3d_array.ndim

3

In [49]:
a_3d_array.size

70

In [50]:
a_3d_array.dtype

dtype('int64')

In [51]:
5 * a_3d_array - 2

array([[[ -2,   3,   8,  13,  18],
        [ 23,  28,  33,  38,  43],
        [ 48,  53,  58,  63,  68],
        [ 73,  78,  83,  88,  93],
        [ 98, 103, 108, 113, 118],
        [123, 128, 133, 138, 143],
        [148, 153, 158, 163, 168]],

       [[173, 178, 183, 188, 193],
        [198, 203, 208, 213, 218],
        [223, 228, 233, 238, 243],
        [248, 253, 258, 263, 268],
        [273, 278, 283, 288, 293],
        [298, 303, 308, 313, 318],
        [323, 328, 333, 338, 343]]])

In [52]:
left_mat = np.arange(6).reshape((2, 3))
right_mat = np.arange(15).reshape((3, 5))

In [53]:
np.inner(left_mat, right_mat)

ValueError: shapes (2,3) and (5,3) not aligned: 3 (dim 1) != 5 (dim 0)

In [54]:
np.dot(left_mat, right_mat)

array([[ 25,  28,  31,  34,  37],
       [ 70,  82,  94, 106, 118]])

#### Operating along axes

In [55]:
a_3d_array.shape

(2, 7, 5)

In [56]:
a_3d_array.sum()

2415

In [57]:
a_3d_array.sum(axis=0)

array([[ 35,  37,  39,  41,  43],
       [ 45,  47,  49,  51,  53],
       [ 55,  57,  59,  61,  63],
       [ 65,  67,  69,  71,  73],
       [ 75,  77,  79,  81,  83],
       [ 85,  87,  89,  91,  93],
       [ 95,  97,  99, 101, 103]])

In [58]:
a_3d_array.sum(axis=1)

array([[105, 112, 119, 126, 133],
       [350, 357, 364, 371, 378]])

In [59]:
a_3d_array.sum(axis=2)

array([[ 10,  35,  60,  85, 110, 135, 160],
       [185, 210, 235, 260, 285, 310, 335]])

#### Broadcasting rules

In [60]:
a_2d_array = np.ones(35, dtype=np.int64).reshape((7,5)) * 3
a_2d_array

array([[3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3]])

In [61]:
random_2d_array = np.random.random((7,5))
random_2d_array

array([[0.49944247, 0.61363306, 0.81829634, 0.8892556 , 0.18079046],
       [0.55235876, 0.59321753, 0.55047985, 0.97468833, 0.2288431 ],
       [0.89885505, 0.13401941, 0.91766917, 0.49700276, 0.73313629],
       [0.9054655 , 0.22244701, 0.3079314 , 0.25330085, 0.81877879],
       [0.12669831, 0.93186473, 0.57055926, 0.28839045, 0.35045627],
       [0.54603649, 0.04629013, 0.2929259 , 0.22494055, 0.01174959],
       [0.62989063, 0.41441934, 0.84304887, 0.18228388, 0.26483598]])

In [62]:
np.set_printoptions(precision=4)
a_3d_array * random_2d_array

array([[[ 0.    ,  0.6136,  1.6366,  2.6678,  0.7232],
        [ 2.7618,  3.5593,  3.8534,  7.7975,  2.0596],
        [ 8.9886,  1.4742, 11.012 ,  6.461 , 10.2639],
        [13.582 ,  3.5592,  5.2348,  4.5594, 15.5568],
        [ 2.534 , 19.5692, 12.5523,  6.633 ,  8.411 ],
        [13.6509,  1.2035,  7.909 ,  6.2983,  0.3407],
        [18.8967, 12.847 , 26.9776,  6.0154,  9.0044]],

       [[17.4805, 22.0908, 30.277 , 33.7917,  7.0508],
        [22.0944, 24.3219, 23.1202, 41.9116, 10.0691],
        [40.4485,  6.1649, 43.1305, 23.8561, 35.9237],
        [45.2733, 11.3448, 16.0124, 13.4249, 44.2141],
        [ 6.9684, 52.1844, 32.5219, 16.7266, 20.6769],
        [32.7622,  2.8237, 18.1614, 14.1713,  0.752 ],
        [40.9429, 27.3517, 56.4843, 12.3953, 18.2737]]])

In [63]:
a_vector = np.arange(5) * 7
a_vector[0] = -1
a_vector

array([-1,  7, 14, 21, 28])

In [64]:
a_3d_array / a_vector

array([[[ -0.    ,   0.1429,   0.1429,   0.1429,   0.1429],
        [ -5.    ,   0.8571,   0.5   ,   0.381 ,   0.3214],
        [-10.    ,   1.5714,   0.8571,   0.619 ,   0.5   ],
        [-15.    ,   2.2857,   1.2143,   0.8571,   0.6786],
        [-20.    ,   3.    ,   1.5714,   1.0952,   0.8571],
        [-25.    ,   3.7143,   1.9286,   1.3333,   1.0357],
        [-30.    ,   4.4286,   2.2857,   1.5714,   1.2143]],

       [[-35.    ,   5.1429,   2.6429,   1.8095,   1.3929],
        [-40.    ,   5.8571,   3.    ,   2.0476,   1.5714],
        [-45.    ,   6.5714,   3.3571,   2.2857,   1.75  ],
        [-50.    ,   7.2857,   3.7143,   2.5238,   1.9286],
        [-55.    ,   8.    ,   4.0714,   2.7619,   2.1071],
        [-60.    ,   8.7143,   4.4286,   3.    ,   2.2857],
        [-65.    ,   9.4286,   4.7857,   3.2381,   2.4643]]])

In [65]:
a_3d_array % a_vector

array([[[ 0,  1,  2,  3,  4],
        [ 0,  6,  7,  8,  9],
        [ 0,  4, 12, 13, 14],
        [ 0,  2,  3, 18, 19],
        [ 0,  0,  8,  2, 24],
        [ 0,  5, 13,  7,  1],
        [ 0,  3,  4, 12,  6]],

       [[ 0,  1,  9, 17, 11],
        [ 0,  6,  0,  1, 16],
        [ 0,  4,  5,  6, 21],
        [ 0,  2, 10, 11, 26],
        [ 0,  0,  1, 16,  3],
        [ 0,  5,  6,  0,  8],
        [ 0,  3, 11,  5, 13]]])

### Creating Structured array

In [66]:
person_data_def = [('name', 'S6'), ('height', 'f8'), ('weight', 'f8'), ('age', 'i8')]
person_data_def

[('name', 'S6'), ('height', 'f8'), ('weight', 'f8'), ('age', 'i8')]

In [67]:
people_array = np.zeros((4), dtype=person_data_def)
people_array

array([(b'', 0., 0., 0), (b'', 0., 0., 0), (b'', 0., 0., 0),
       (b'', 0., 0., 0)],
      dtype=[('name', 'S6'), ('height', '<f8'), ('weight', '<f8'), ('age', '<i8')])

In [68]:
people_array[3] = ('Delta', 73, 205, 34)

In [69]:
people_array[0] = ('Alpha', 65, 112, 23)

In [70]:
people_array

array([(b'Alpha', 65., 112., 23), (b'',  0.,   0.,  0),
       (b'',  0.,   0.,  0), (b'Delta', 73., 205., 34)],
      dtype=[('name', 'S6'), ('height', '<f8'), ('weight', '<f8'), ('age', '<i8')])

In [71]:
people_array[0:]

array([(b'Alpha', 65., 112., 23), (b'',  0.,   0.,  0),
       (b'',  0.,   0.,  0), (b'Delta', 73., 205., 34)],
      dtype=[('name', 'S6'), ('height', '<f8'), ('weight', '<f8'), ('age', '<i8')])

In [72]:
ages = people_array['age']
ages

array([23,  0,  0, 34])

In [73]:
ages / 2

array([11.5,  0. ,  0. , 17. ])

#### Multi-dimensional structured arrays 

In [74]:
people_big_array = np.zeros((4, 3, 2), dtype=person_data_def)
people_big_array

array([[[(b'', 0., 0., 0), (b'', 0., 0., 0)],
        [(b'', 0., 0., 0), (b'', 0., 0., 0)],
        [(b'', 0., 0., 0), (b'', 0., 0., 0)]],

       [[(b'', 0., 0., 0), (b'', 0., 0., 0)],
        [(b'', 0., 0., 0), (b'', 0., 0., 0)],
        [(b'', 0., 0., 0), (b'', 0., 0., 0)]],

       [[(b'', 0., 0., 0), (b'', 0., 0., 0)],
        [(b'', 0., 0., 0), (b'', 0., 0., 0)],
        [(b'', 0., 0., 0), (b'', 0., 0., 0)]],

       [[(b'', 0., 0., 0), (b'', 0., 0., 0)],
        [(b'', 0., 0., 0), (b'', 0., 0., 0)],
        [(b'', 0., 0., 0), (b'', 0., 0., 0)]]],
      dtype=[('name', 'S6'), ('height', '<f8'), ('weight', '<f8'), ('age', '<i8')])

In [75]:
people_big_array[3, 2, 1] = ('Echo', 68, 155, 46)

In [76]:
people_big_array

array([[[(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)],
        [(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)],
        [(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)]],

       [[(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)],
        [(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)],
        [(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)]],

       [[(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)],
        [(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)],
        [(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)]],

       [[(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)],
        [(b'',  0.,   0.,  0), (b'',  0.,   0.,  0)],
        [(b'',  0.,   0.,  0), (b'Echo', 68., 155., 46)]]],
      dtype=[('name', 'S6'), ('height', '<f8'), ('weight', '<f8'), ('age', '<i8')])

In [77]:
people_big_array['height']

array([[[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.],
        [ 0., 68.]]])

In [78]:
people_big_array[['height', 'weight']]

array([[[( 0.,   0.), ( 0.,   0.)],
        [( 0.,   0.), ( 0.,   0.)],
        [( 0.,   0.), ( 0.,   0.)]],

       [[( 0.,   0.), ( 0.,   0.)],
        [( 0.,   0.), ( 0.,   0.)],
        [( 0.,   0.), ( 0.,   0.)]],

       [[( 0.,   0.), ( 0.,   0.)],
        [( 0.,   0.), ( 0.,   0.)],
        [( 0.,   0.), ( 0.,   0.)]],

       [[( 0.,   0.), ( 0.,   0.)],
        [( 0.,   0.), ( 0.,   0.)],
        [( 0.,   0.), (68., 155.)]]],
      dtype={'names':['height','weight'], 'formats':['<f8','<f8'], 'offsets':[6,14], 'itemsize':30})

#### Creating record arrays

In [79]:
person_record_array = np.rec.array([('Delta', 73, 205, 34), ('Alpha', 65, 112, 23)], dtype = person_data_def)
person_record_array

rec.array([(b'Delta', 73., 205., 34), (b'Alpha', 65., 112., 23)],
          dtype=[('name', 'S6'), ('height', '<f8'), ('weight', '<f8'), ('age', '<i8')])

In [80]:
person_record_array[0].age

34