# Numpy

## Creating Numpy Arrays

In [1]:
import numpy as np

In [2]:
# 1D array using python list, each variable has a single value
np.array(list('abcdefg'))

array(['a', 'b', 'c', 'd', 'e', 'f', 'g'], dtype='<U1')

In [3]:
# 1D array using tuple
np.array((1,2,3,4,5,6), dtype='str')

array(['1', '2', '3', '4', '5', '6'], dtype='<U1')

In [4]:
# any number 1 or greater is True
np.array((1,0,1,1,0,0,1), dtype='bool')

array([ True, False,  True,  True, False, False,  True])

In [5]:
# create numpy array from txt file
# columns ned to be of the same length and dtype
np.genfromtxt('./data/mixed.txt', delimiter=',', dtype='int')

array([[  1,   2,   3,   4,  -1,   6,   7,   8,  -1,  -1],
       [  0,   9,  87,  34,  12,  43,   5,   6,   7,   0],
       [123,  -1, 543,  63, 789,  56, 234,  54, 888, 147],
       [  1,   2,  -1,   4,   5,   6,   7,  -1,   9,  -1],
       [  0,   9,  87,  34,  12,  43,  -1,   6,   7,   0],
       [123,  23,  -1,  63, 789,  56, 234,  54, 888,  -1]])

In [6]:
np.genfromtxt('./data/mixed.txt', delimiter=',', dtype='float')

array([[  1.,   2.,   3.,   4.,  nan,   6.,   7.,   8.,  nan,  nan],
       [  0.,   9.,  87.,  34.,  12.,  43.,   5.,   6.,   7.,   0.],
       [123.,  nan, 543.,  63., 789.,  56., 234.,  54., 888., 147.],
       [  1.,   2.,  nan,   4.,   5.,   6.,   7.,  nan,   9.,  nan],
       [  0.,   9.,  87.,  34.,  12.,  43.,  nan,   6.,   7.,   0.],
       [123.,  23.,  nan,  63., 789.,  56., 234.,  54., 888.,  nan]])

## Selecting Elements

Select elements from a 1D array as you would from Python lists.

In [7]:
ndarr = np.array((1,23,5,34,67,34,34,45,78,45,7,3,6,345,5), dtype='int')
print(ndarr)

[  1  23   5  34  67  34  34  45  78  45   7   3   6 345   5]


In [8]:
ndarr[-2:-6:-1]

array([345,   6,   3,   7])

In [9]:
print(ndarr[::-1])

[  5 345   6   3   7  45  78  45  34  34  67  34   5  23   1]


Selecting elements from 2D arrays, specify the row and the column.

In [10]:
arr = np.array([
    [1,2,3,4,5,6,7,8,9,0],
    [0,9,8,7,6,5,4,3,2,1],
    [1,2,3,4,5,6,7,8,9,0],
    [0,9,8,7,6,5,4,3,2,1],
    [1,2,3,4,5,6,7,8,9,0],
    [0,9,8,7,6,5,4,3,2,1],
])
print(arr)

[[1 2 3 4 5 6 7 8 9 0]
 [0 9 8 7 6 5 4 3 2 1]
 [1 2 3 4 5 6 7 8 9 0]
 [0 9 8 7 6 5 4 3 2 1]
 [1 2 3 4 5 6 7 8 9 0]
 [0 9 8 7 6 5 4 3 2 1]]


In [11]:
print(arr[1:3, 4:9]) # row, col

[[6 5 4 3 2]
 [5 6 7 8 9]]


In [12]:
print(arr[:1, 4:9])

[[5 6 7 8 9]]


In [13]:
print(arr[:, 5]) # all rows, col index 5

[6 5 6 5 6 5]


## Numpy Array Operations

In [14]:
# numpy supports element-wise operations
arr = np.genfromtxt('./data/numbers.txt', delimiter=',', dtype='int')
arr

array([[1, 0, 1, 1, 1, 0, 0, 0, 0, 1],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 1, 1, 0, 1, 0, 1, 1, 1, 1],
       [1, 1, 0, 1, 0, 1, 1, 0, 0, 0]])

In [15]:
np.cos(arr)

array([[0.54030231, 1.        , 0.54030231, 0.54030231, 0.54030231,
        1.        , 1.        , 1.        , 1.        , 0.54030231],
       [1.        , 1.        , 0.54030231, 0.54030231, 0.54030231,
        0.54030231, 1.        , 1.        , 1.        , 1.        ],
       [1.        , 0.54030231, 0.54030231, 1.        , 0.54030231,
        1.        , 0.54030231, 0.54030231, 0.54030231, 0.54030231],
       [0.54030231, 0.54030231, 1.        , 0.54030231, 1.        ,
        0.54030231, 0.54030231, 1.        , 1.        , 1.        ]])

In [16]:
arr * 3

array([[3, 0, 3, 3, 3, 0, 0, 0, 0, 3],
       [0, 0, 3, 3, 3, 3, 0, 0, 0, 0],
       [0, 3, 3, 0, 3, 0, 3, 3, 3, 3],
       [3, 3, 0, 3, 0, 3, 3, 0, 0, 0]])

Operations can be performed on two or more arrays. They must be of the same length, shape and dtype. Operations are performed on the same elements in the same positions - all operations are performed element wise.

In [17]:
np.cos(arr) * (arr + 1) - arr

array([[0.08060461, 1.        , 0.08060461, 0.08060461, 0.08060461,
        1.        , 1.        , 1.        , 1.        , 0.08060461],
       [1.        , 1.        , 0.08060461, 0.08060461, 0.08060461,
        0.08060461, 1.        , 1.        , 1.        , 1.        ],
       [1.        , 0.08060461, 0.08060461, 1.        , 0.08060461,
        1.        , 0.08060461, 0.08060461, 0.08060461, 0.08060461],
       [0.08060461, 0.08060461, 1.        , 0.08060461, 1.        ,
        0.08060461, 0.08060461, 1.        , 1.        , 1.        ]])

You can perform element wise logical operations, on the whole array or a selection.

In [18]:
arr[:, 3:6] > 0 # numpy creates a new array with the result of each operation.

array([[ True,  True, False],
       [ True,  True,  True],
       [False,  True, False],
       [ True, False,  True]])

In [19]:
arr

array([[1, 0, 1, 1, 1, 0, 0, 0, 0, 1],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 1, 1, 0, 1, 0, 1, 1, 1, 1],
       [1, 1, 0, 1, 0, 1, 1, 0, 0, 0]])

In [20]:
num_arr = np.array([
    [1,2,3,4,5,6,7,8,9,0],
    [0,9,8,7,6,5,4,3,2,1],
    [1,2,3,4,5,6,7,8,9,0],
    [0,9,8,7,6,5,4,3,2,1],
    [1,2,3,4,5,6,7,8,9,0],
    [0,9,8,7,6,5,4,3,2,1],
])
print(arr)

[[1 0 1 1 1 0 0 0 0 1]
 [0 0 1 1 1 1 0 0 0 0]
 [0 1 1 0 1 0 1 1 1 1]
 [1 1 0 1 0 1 1 0 0 0]]


In [21]:
num_arr > 5

array([[False, False, False, False, False,  True,  True,  True,  True,
        False],
       [False,  True,  True,  True,  True, False, False, False, False,
        False],
       [False, False, False, False, False,  True,  True,  True,  True,
        False],
       [False,  True,  True,  True,  True, False, False, False, False,
        False],
       [False, False, False, False, False,  True,  True,  True,  True,
        False],
       [False,  True,  True,  True,  True, False, False, False, False,
        False]])

Return only those values that meet the condition, returned 'True'.

In [22]:
num_arr[num_arr > 5]

array([6, 7, 8, 9, 9, 8, 7, 6, 6, 7, 8, 9, 9, 8, 7, 6, 6, 7, 8, 9, 9, 8,
       7, 6])

We can combine logical 'and' and 'or' operations. Use '&' and '|', the conditions must be surronded by '()'. Use `~` to 'flip' the result - apply logical `not`.

In [23]:
(num_arr > 4) & (num_arr < 6)

array([[False, False, False, False,  True, False, False, False, False,
        False],
       [False, False, False, False, False,  True, False, False, False,
        False],
       [False, False, False, False,  True, False, False, False, False,
        False],
       [False, False, False, False, False,  True, False, False, False,
        False],
       [False, False, False, False,  True, False, False, False, False,
        False],
       [False, False, False, False, False,  True, False, False, False,
        False]])

In [24]:
num_arr[(num_arr > 4) & (num_arr < 6)]

array([5, 5, 5, 5, 5, 5])

We can also compare numpy arrays element wise, they must be the same **size and shape**.

In [25]:
arr1 = np.array([1,2,3,4,5,6,7,8,9,0])
arr2 = np.array([0,9,8,7,6,5,4,3,2,1])

arr1 > arr2

array([ True, False, False, False, False,  True,  True,  True,  True,
       False])

In [26]:
arr1[arr1 > arr2] # return those values in arr1 that are greater than arr2

array([1, 6, 7, 8, 9])

In [27]:
heights = np.array([123.34, 143.34, 156.34, 178.56, 165.21, 189.43, 153.54])
ages = np.array([23, 43, 34, 54, 46, 65, 25])
names = np.array(['tom', 'dick', 'harry', 'pete', 'mike', 'john', 'simon'])

print('Heights for people > 40', heights[ages > 40])
print('Ages of people taller than 160.0', ages[heights > 160.0])

Heights for people > 40 [143.34 178.56 165.21 189.43]
Ages of people taller than 160.0 [54 46 65]


You can't use logical `and`, `or` or `not`. Instead `numpy` provides `logical_and`, `logical_or`, and `logical_not` methods.

In [28]:
# find individuals who are taller than 170.0 and older than 25
np.logical_and(heights > 170.0, ages > 25)

array([False, False, False,  True, False,  True, False])

In [29]:
(heights > 170.0) & (ages > 25)

array([False, False, False,  True, False,  True, False])

In [30]:
names[np.logical_and(heights > 170, ages > 25)]

array(['pete', 'john'], dtype='<U5')

In [31]:
# find individuals who are neither taller than 170 or under 26 (opposite of above)
np.logical_not(heights > 170, ages > 25)

array([ True,  True,  True, False,  True, False,  True])

In [32]:
~((heights > 170) & (ages > 25)) # (heights < 170) | (ages < 26)

array([ True,  True,  True, False,  True, False,  True])

In [33]:
names[np.logical_not(heights > 170, ages > 25)] # (heights < 170) | (ages < 26)

array(['tom', 'dick', 'harry', 'mike', 'simon'], dtype='<U5')

In [34]:
# find individuals who are EITHER taller than 170.0 OR older than 25
np.logical_or(heights > 170.0, ages > 25)

array([False,  True,  True,  True,  True,  True, False])

In [35]:
(heights > 170) | (ages > 25)

array([False,  True,  True,  True,  True,  True, False])

In [36]:
names[(heights > 170) | (ages > 25)]

array(['dick', 'harry', 'pete', 'mike', 'john'], dtype='<U5')

In [37]:
num_arr

array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 0],
       [0, 9, 8, 7, 6, 5, 4, 3, 2, 1],
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 0],
       [0, 9, 8, 7, 6, 5, 4, 3, 2, 1],
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 0],
       [0, 9, 8, 7, 6, 5, 4, 3, 2, 1]])

In [38]:
num_arr[(num_arr > 4) & (num_arr < 8)]

array([5, 6, 7, 7, 6, 5, 5, 6, 7, 7, 6, 5, 5, 6, 7, 7, 6, 5])

In [39]:
num_arr[~((num_arr > 4) & (num_arr < 8))]

array([1, 2, 3, 4, 8, 9, 0, 0, 9, 8, 4, 3, 2, 1, 1, 2, 3, 4, 8, 9, 0, 0,
       9, 8, 4, 3, 2, 1, 1, 2, 3, 4, 8, 9, 0, 0, 9, 8, 4, 3, 2, 1])

## Iterating over Numpy Arrays

You can use a `for` loop to iterate over both 1D and 2D numpy arrays:

In [42]:
arr = np.array([
    list('abcd'),
    list('hijk'),
    list('wxyz')
])

for row in arr:
    for value in row:
        print('{}: {}'. format(row, value))

['a' 'b' 'c' 'd']: a
['a' 'b' 'c' 'd']: b
['a' 'b' 'c' 'd']: c
['a' 'b' 'c' 'd']: d
['h' 'i' 'j' 'k']: h
['h' 'i' 'j' 'k']: i
['h' 'i' 'j' 'k']: j
['h' 'i' 'j' 'k']: k
['w' 'x' 'y' 'z']: w
['w' 'x' 'y' 'z']: x
['w' 'x' 'y' 'z']: y
['w' 'x' 'y' 'z']: z


Alternatively, we can use numpy's `nditer` method to iterate over 1D and 2D arrays.

In [43]:
for char in np.nditer(arr):
    print(char)

a
b
c
d
h
i
j
k
w
x
y
z


In [44]:
for char in np.nditer(np.array(list('abcdef'))):
    print(char)

a
b
c
d
e
f
