# Numpy Arrays

**Keywords:** sequence - iterable/indexed, mutable, nesting

Arrays act a lot like lists with many methods that make computing easier.

## One Dimensional

### Creating an Array

Arrays are constructed by passing a list into np.array(). Arrays can hold different object types.

In [92]:
import numpy as np

In [94]:
arr1 = np.array([1, 2, 3])
arr1

array([1, 2, 3])

In [95]:
list1 = [1, 2, 3, 4]   # create a list first
arr2 = np.array(list1)
arr2

array([1, 2, 3, 4])

### Indexing and Slicing

In [96]:
# create array from 0 - 11 (inclusive - exclusive)
arr3 = np.arange(0, 11)
arr3

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [97]:
# call individual value
arr3[8]

8

In [98]:
# slicing is the same as lists
arr3[1:5]

array([1, 2, 3, 4])

In [99]:
arr3[0:5] = 100
arr3

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [100]:
arr3 = np.arange(0, 11)

slice_arr = arr3[0:6]
slice_arr

array([0, 1, 2, 3, 4, 5])

In [101]:
slice_arr[:] = 99
slice_arr

array([99, 99, 99, 99, 99, 99])

In [102]:
# doesn't create copy when you declare/slice, still points to original
arr3

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [103]:
# creates a copy of original array and doesn't point to original
arr_copy = arr.copy()
arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

## Nesting Arrays / Two Dimensional

### Creating an Array

A great feature of of Python data structures is that they support *nesting*. This means we can have data structures within data structures. For example: An array inside an array.

In [104]:
list2 = [11, 22, 33, 44]

my_lists = [list1, list2]   # create a nested list
my_lists

[[1, 2, 3, 4], [11, 22, 33, 44]]

In [106]:
# create a multi-dimension numpy array/matrix from nested lists
np.array(my_lists)

array([[ 1,  2,  3,  4],
       [11, 22, 33, 44]])

In [107]:
np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [108]:
# create a list, then reshape it
np.arange(15).reshape((5,3))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

### Indexing and Slicing

In [109]:
arr4 = np.array(([5, 10, 15], [20, 25, 30], [35, 40, 45]))
arr4

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [110]:
# selects index 1 row
arr4[1]

array([20, 25, 30])

In [113]:
# selects index 1 row and index 0 column
arr4[1][0]

20

In [114]:
arr4[:2, 1:]

array([[10, 15],
       [25, 30]])

In [115]:
arr5 = np.zeros((5, 5))
arr5

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [119]:
for i in range(arr_length): # counts from 0 - 5 (inclusive - exclusive)
    arr5[i] = i
    
arr5

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.,  4.]])

In [120]:
# returns rows 2 and 4
arr5[[2, 4]]

array([[ 2.,  2.,  2.,  2.,  2.],
       [ 4.,  4.,  4.,  4.,  4.]])

## Array Methods

In [121]:
# add two same dimensional arrays
np.add(arr5, np.eye(5))

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 1.,  2.,  1.,  1.,  1.],
       [ 2.,  2.,  3.,  2.,  2.],
       [ 3.,  3.,  3.,  4.,  3.],
       [ 4.,  4.,  4.,  4.,  5.]])

In [122]:
# length/number of columns
arr5.shape[1]

5

In [127]:
# length/number of rows
arr5.shape[0]

5

In [128]:
arr6 = np.array([[1, 2, 3, 4], [11, 22, 33, 44]])
arr6

array([[ 1,  2,  3,  4],
       [11, 22, 33, 44]])

In [129]:
# take square root of values
np.sqrt(arr6)

array([[ 1.        ,  1.41421356,  1.73205081,  2.        ],
       [ 3.31662479,  4.69041576,  5.74456265,  6.63324958]])

In [130]:
# e raised to the power of each value
np.exp(arr6)

array([[  2.71828183e+00,   7.38905610e+00,   2.00855369e+01,
          5.45981500e+01],
       [  5.98741417e+04,   3.58491285e+09,   2.14643580e+14,
          1.28516001e+19]])

In [131]:
# returns shape of array in a tuple
arr6.shape

(2, 4)

In [132]:
# returns data type of array's contents
arr6.dtype

dtype('int64')

In [133]:
# returns floating 0 values with shape you give it
np.zeros(5)

array([ 0.,  0.,  0.,  0.,  0.])

In [136]:
# returns floating 0 values with shape you give it
np.empty([2, 5])

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [137]:
# returns floating 1 values with shape you give it
np.ones([4, 2])

array([[ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.]])

In [138]:
# returns a square identity matrix
np.eye(5)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [139]:
# returns numpy array from 0 - 4
np.arange(5)

array([0, 1, 2, 3, 4])

In [140]:
# returns numpy array from 5 - 49 every 2 numbers
np.arange(5, 50, 2)

array([ 5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37,
       39, 41, 43, 45, 47, 49])

In [142]:
# create array from 0 - 50 and reshape to (rows, columns)
arr7 = np.arange(40).reshape((10,4))
arr7

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31],
       [32, 33, 34, 35],
       [36, 37, 38, 39]])

In [143]:
# transposition - first row becomes first column, etc
arr7.T

array([[ 0,  4,  8, 12, 16, 20, 24, 28, 32, 36],
       [ 1,  5,  9, 13, 17, 21, 25, 29, 33, 37],
       [ 2,  6, 10, 14, 18, 22, 26, 30, 34, 38],
       [ 3,  7, 11, 15, 19, 23, 27, 31, 35, 39]])

In [144]:
# alternative way to get transpose
arr7.swapaxes(0, 1)

array([[ 0,  4,  8, 12, 16, 20, 24, 28, 32, 36],
       [ 1,  5,  9, 13, 17, 21, 25, 29, 33, 37],
       [ 2,  6, 10, 14, 18, 22, 26, 30, 34, 38],
       [ 3,  7, 11, 15, 19, 23, 27, 31, 35, 39]])

In [145]:
# take dot product of arrays
np.dot(arr7.T, arr7)

array([[4560, 4740, 4920, 5100],
       [4740, 4930, 5120, 5310],
       [4920, 5120, 5320, 5520],
       [5100, 5310, 5520, 5730]])

In [146]:
# make 3D matrix
# 3 sections that have 4 x 2 arrays
arr8 = np.arange(24).reshape((3, 4, 2))
arr8

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11],
        [12, 13],
        [14, 15]],

       [[16, 17],
        [18, 19],
        [20, 21],
        [22, 23]]])

In [147]:
# transpose a 3d matrix
# takes first row of every section and groups in a new section, etc
arr8.transpose(1, 0, 2)

array([[[ 0,  1],
        [ 8,  9],
        [16, 17]],

       [[ 2,  3],
        [10, 11],
        [18, 19]],

       [[ 4,  5],
        [12, 13],
        [20, 21]],

       [[ 6,  7],
        [14, 15],
        [22, 23]]])

In [148]:
# random normal distribution
arr9 = np.random.randn(10)
arr9

array([-0.57830015, -0.4363635 ,  0.53254782,  0.47538752, -0.41632771,
       -0.80055596, -1.57035673,  1.10998647,  0.59079399, -0.19200901])

In [149]:
arr10 = np.random.randn(10)
arr10

array([ 1.26136417,  0.42497431,  1.22474215,  0.8215719 , -0.2481461 ,
       -1.57331411,  0.31496321,  1.05446282,  0.00844202, -1.4103893 ])

In [150]:
# add elements of same dimensional array
np.add(arr9, arr10)

array([ 0.68306402, -0.01138919,  1.75728998,  1.29695942, -0.66447381,
       -2.37387007, -1.25539353,  2.16444929,  0.59923601, -1.60239831])

In [151]:
# finds maximum value at each index
np.maximum(arr9, arr10)

array([ 1.26136417,  0.42497431,  1.22474215,  0.8215719 , -0.2481461 ,
       -0.80055596,  0.31496321,  1.10998647,  0.59079399, -0.19200901])

In [152]:
# finds minimum value at each index
np.minimum(arr9, arr10)

array([-0.57830015, -0.4363635 ,  0.53254782,  0.47538752, -0.41632771,
       -1.57331411, -1.57035673,  1.05446282,  0.00844202, -1.4103893 ])

In [153]:
arr11 = np.array([1, 2, 3, 4])
arr12 = np.array([100, 200, 300, 400])
condition = np.array([True, True, False, False])

# list comprehension + zip
[(A_val if cond else B_val) for A_val, B_val, cond in zip(arr11, arr12, condition)]

[1, 2, 300, 400]

In [154]:
# or could do that easily with where()
# where my condition is met, choose A index value and if not choose B
np.where(condition, arr11, arr12)

array([  1,   2, 300, 400])

In [155]:
arr13 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr13

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [156]:
# sum whole array
arr13.sum()

45

In [157]:
# sum along columns
arr13.sum(0)

array([12, 15, 18])

In [158]:
# sum along rows
arr13.sum(1)

array([ 6, 15, 24])

In [159]:
arr13.mean()

5.0

In [160]:
arr13.std()

2.5819888974716112

In [161]:
arr13.var()

6.666666666666667

In [163]:
from numpy.random import randn

arr14 = randn(3,3)
arr14

array([[-0.98713571, -0.56697492,  0.88499332],
       [-0.40175276,  0.88603174, -0.16682353],
       [-0.05972145,  0.73708286, -0.83130662]])

In [166]:
# sorts along each row
arr14.sort()
arr14

array([[-0.98713571, -0.56697492,  0.88499332],
       [-0.40175276, -0.16682353,  0.88603174],
       [-0.83130662, -0.05972145,  0.73708286]])

In [167]:
countries = np.array(['France', 'Germany', 'USA', 'Russia', 'USA', 'Mexico', 'Germany'])

# returns array of unique values
np.unique(countries)

array(['France', 'Germany', 'Mexico', 'Russia', 'USA'], 
      dtype='|S7')

In [168]:
# checks if France USA and Sweden, one dimensional index, is in countries array
np.in1d(['France', 'USA', 'Sweden'], countries)

array([ True,  True, False], dtype=bool)

## Scalars

In [172]:
arr15 = np.array([[1, 2, 3, 4], [8, 9, 10, 11]])
arr15

array([[ 1,  2,  3,  4],
       [ 8,  9, 10, 11]])

In [173]:
# scalar operations will loop through each value and perform operation
arr15 * arr15

array([[  1,   4,   9,  16],
       [ 64,  81, 100, 121]])

In [174]:
arr15 - arr15

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [175]:
1.0 / arr15

array([[ 1.        ,  0.5       ,  0.33333333,  0.25      ],
       [ 0.125     ,  0.11111111,  0.1       ,  0.09090909]])

In [176]:
arr15 ** 3

array([[   1,    8,   27,   64],
       [ 512,  729, 1000, 1331]])

In [177]:
np.sqrt(arr15)

array([[ 1.        ,  1.41421356,  1.73205081,  2.        ],
       [ 2.82842712,  3.        ,  3.16227766,  3.31662479]])