# Week 3: NumPy and Arrays

NumPy is a Python package for numerical computing. The core NumPy data type is the homogeneous array.

### Built-in functions don't have to be imported before they are used

In [1]:
l = [1,'two',3.0]
len(l)                # len() is a built-in function

3

### Some functions are part of the Python Standard Library, but not built-in. These functions come with every Python installation, but must be imported before they are used

In [2]:
sin(30)    # sin() is not a built-in function

NameError: name 'sin' is not defined

In [3]:
from math import sin     # sin() must be imported from the math module before it can be used

sin(30)

-0.9880316240928618

### Functions in external packages need to be installed and imported before they are used. NumPy is an external package, it comes pre-installed in the Anaconda Distribution of Python. So on PCC Computer lab computers, NumPy does not need to be installed, just imported

In [4]:
import numpy as np

np.__version__

'1.15.1'

In [5]:
import numpy as rainbowunicorn

rainbowunicorn.__version__

'1.15.1'

In [6]:
import numpy as np

### The main NumPy data type is the homogeneous array. 

The function

```
a = np.array([element1, element2, element3])
```

creates a NumPy array from a Python list (or tuple).

In [7]:
a = np.array([1,2,3])
type(a)

numpy.ndarray

Homogeneous arrays can only contain one type of object

In [8]:
a = np.array([1, 2.0, 3+0j])
a

array([1.+0.j, 2.+0.j, 3.+0.j])

In [9]:
a = np.array([True,False,True])   # boolean array
a

array([ True, False,  True])

In [10]:
a = np.array(['my','best','string'])  # string array
a

array(['my', 'best', 'string'], dtype='<U6')

## Data Types in NumPy Arrays

most precise

 * np.bool
 * np.int64
 * np.float64
 * np.string
 * np.object

least precise

The elements in a NumPy array will be "downcast" to the least precise type

In [11]:
a = np.array([True,5 ,4.2 ,'me'])
a

array(['True', '5', '4.2', 'me'], dtype='<U32')

In [12]:
a = np.array([2.0, 3, 5.9], dtype=np.int64)
a

array([2, 3, 5], dtype=int64)

## Array Attributes

In [13]:
a = np.array([[1,2,3],[4,5,6]])

In [14]:
a.dtype

dtype('int32')

In [15]:
a.ndim

2

In [16]:
a.size

6

In [17]:
a.shape

(2, 3)

### Array attributes can be assigned

In [18]:
a = np.array([[1,2,3],[4,5,6]])
print(a)
a.shape

[[1 2 3]
 [4 5 6]]


(2, 3)

In [19]:
a.shape=(3,2)
print(a)
a.shape

[[1 2]
 [3 4]
 [5 6]]


(3, 2)

## Array Creation Functions

In [20]:
a = np.arange(0,10,1)    #start, stop, step
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [21]:
a = np.arange(9)     # defaults are used if not provided
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [22]:
a = np.arange(0,10,2)
a

array([0, 2, 4, 6, 8])

In [23]:
a = np.linspace(0,360,6)  #start, stop, num
a

array([  0.,  72., 144., 216., 288., 360.])

In [24]:
a = np.logspace(0,2,3)   #logrithmically spaced values start, stop, num
a

array([  1.,  10., 100.])

In [25]:
a = np.ones(5)
print(a)
a.dtype=np.bool
a

[1. 1. 1. 1. 1.]


array([False, False, False, False, False, False,  True,  True, False,
       False, False, False, False, False,  True,  True, False, False,
       False, False, False, False,  True,  True, False, False, False,
       False, False, False,  True,  True, False, False, False, False,
       False, False,  True,  True])

In [26]:
a = np.zeros(5)
a

array([0., 0., 0., 0., 0.])

In [27]:
a = np.arange(4)
print(a)
b = np.arange(3)
print(b)
X, Y = np.meshgrid(a,b)
print(X)
print(Y)

[0 1 2 3]
[0 1 2]
[[0 1 2 3]
 [0 1 2 3]
 [0 1 2 3]]
[[0 0 0 0]
 [1 1 1 1]
 [2 2 2 2]]


## Broadcasting functions and mathematical operations accross arrays

In [28]:
l = [1,2,3]
l*2

[1, 2, 3, 1, 2, 3]

In [29]:
a = np.array([1,2,3])
a*2

array([2, 4, 6])

In [30]:
a**(1/2)

array([1.        , 1.41421356, 1.73205081])

In [31]:
a**2 + 3*a + 2

array([ 6, 12, 20])

In [32]:
from math import sin

print(sin(30))
sin([30,60,90])

-0.9880316240928618


TypeError: must be real number, not list

In [33]:
import numpy as np

print(np.sin(30))
np.sin(np.array([30,60,90]))

-0.9880316240928618


array([-0.98803162, -0.30481062,  0.89399666])

In [34]:
a = np.array([1,9,16])

In [35]:
np.mean(a)

8.666666666666666

In [36]:
np.median(a)

9.0

In [37]:
np.std(a)

6.128258770283412

In [38]:
np.exp(a)

array([2.71828183e+00, 8.10308393e+03, 8.88611052e+06])

In [39]:
np.log(a)

array([0.        , 2.19722458, 2.77258872])

In [40]:
np.log10(a)

array([0.        , 0.95424251, 1.20411998])

In [41]:
a = np.arange(1)
b = np.arange(9)
a*b

array([0, 0, 0, 0, 0, 0, 0, 0, 0])

In [42]:
a = np.array([1,2,3])
a

array([1, 2, 3])

In [43]:
b = np.array([2,4])
print(b)
b.shape = (2,1)
b

[2 4]


array([[2],
       [4]])

In [44]:
a * b

array([[ 2,  4,  6],
       [ 4,  8, 12]])

In [45]:
a = np.arange(11)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [46]:
b = np.arange(11)
b.shape = (11,1)
b

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10]])

In [47]:
a * b

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10],
       [  0,   2,   4,   6,   8,  10,  12,  14,  16,  18,  20],
       [  0,   3,   6,   9,  12,  15,  18,  21,  24,  27,  30],
       [  0,   4,   8,  12,  16,  20,  24,  28,  32,  36,  40],
       [  0,   5,  10,  15,  20,  25,  30,  35,  40,  45,  50],
       [  0,   6,  12,  18,  24,  30,  36,  42,  48,  54,  60],
       [  0,   7,  14,  21,  28,  35,  42,  49,  56,  63,  70],
       [  0,   8,  16,  24,  32,  40,  48,  56,  64,  72,  80],
       [  0,   9,  18,  27,  36,  45,  54,  63,  72,  81,  90],
       [  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100]])

## Array Indexing and Slicing

NumPy arrays can be indexed and sliced just like Python lists and strings

### Quick review of string and list indexing and slicing

In [48]:
l = [2,4,6]
s = 'video'

In [49]:
print(l[1])
print(s[3])

4
e


In [50]:
l[1:3:1]
l[1:3]
l[1:]

[4, 6]

In [51]:
s[1:3:1]
s[1:3]

'id'

In [52]:
l[:]

[2, 4, 6]

In [53]:
s[:]

'video'

### Index and Slice 1D arrays

In [54]:
a = np.arange(10,60,10)
a

array([10, 20, 30, 40, 50])

In [55]:
a[2]

30

In [56]:
a[2:5:1]
a[2:5]
a[2:]

array([30, 40, 50])

In [57]:
a[:]

array([10, 20, 30, 40, 50])

### Slice and Index 2D Arrays

In [58]:
a = np.arange(10,130,10)
a.shape = (3,4)
a

array([[ 10,  20,  30,  40],
       [ 50,  60,  70,  80],
       [ 90, 100, 110, 120]])

In [59]:
a[1,2]    #[row,col]

70

In [60]:
a[-1,0]

90

In [61]:
a[0,1:4:1]
a[0,1::1]

array([20, 30, 40])

In [62]:
a[0:3,2]
a[:,2]
a[:,-2]

array([ 30,  70, 110])

In [63]:
a[2,1] = -100    # assign a value to an index
a

array([[  10,   20,   30,   40],
       [  50,   60,   70,   80],
       [  90, -100,  110,  120]])

In [64]:
a = np.arange(-3,5,1)
a

array([-3, -2, -1,  0,  1,  2,  3,  4])

In [65]:
# get -2, 3, 4
indexes=np.array([1,6,7])
a[indexes]

array([-2,  3,  4])

### np.where() returns the location of a value in an array

In [66]:
a = np.arange(-3,5,1)
a

array([-3, -2, -1,  0,  1,  2,  3,  4])

In [67]:
# where is the value 3 stored in the array?
loc = np.where(a==3)
loc

(array([6], dtype=int64),)

In [68]:
a[loc]

array([3])

In [69]:
locs = np.where(a<0)  # return the location of the negative numbers
locs

(array([0, 1, 2], dtype=int64),)

In [70]:
a[locs]

array([-3, -2, -1])

In [71]:
locs = np.where(a>0) # return the locations of the positive numbers
locs

(array([4, 5, 6, 7], dtype=int64),)

In [72]:
a[locs]

array([1, 2, 3, 4])

In [73]:
a = np.arange(0,100,1)
a.shape=(10,10)
a

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [74]:
a = -2*a**2 + 3*a - 10
a

array([[   -10,     -9,    -12,    -19,    -30,    -45,    -64,    -87,
          -114,   -145],
       [  -180,   -219,   -262,   -309,   -360,   -415,   -474,   -537,
          -604,   -675],
       [  -750,   -829,   -912,   -999,  -1090,  -1185,  -1284,  -1387,
         -1494,  -1605],
       [ -1720,  -1839,  -1962,  -2089,  -2220,  -2355,  -2494,  -2637,
         -2784,  -2935],
       [ -3090,  -3249,  -3412,  -3579,  -3750,  -3925,  -4104,  -4287,
         -4474,  -4665],
       [ -4860,  -5059,  -5262,  -5469,  -5680,  -5895,  -6114,  -6337,
         -6564,  -6795],
       [ -7030,  -7269,  -7512,  -7759,  -8010,  -8265,  -8524,  -8787,
         -9054,  -9325],
       [ -9600,  -9879, -10162, -10449, -10740, -11035, -11334, -11637,
        -11944, -12255],
       [-12570, -12889, -13212, -13539, -13870, -14205, -14544, -14887,
        -15234, -15585],
       [-15940, -16299, -16662, -17029, -17400, -17775, -18154, -18537,
        -18924, -19315]])

In [75]:
# pull out all values less than -2000
locs = np.where(a<-2000)
a[locs]

array([ -2089,  -2220,  -2355,  -2494,  -2637,  -2784,  -2935,  -3090,
        -3249,  -3412,  -3579,  -3750,  -3925,  -4104,  -4287,  -4474,
        -4665,  -4860,  -5059,  -5262,  -5469,  -5680,  -5895,  -6114,
        -6337,  -6564,  -6795,  -7030,  -7269,  -7512,  -7759,  -8010,
        -8265,  -8524,  -8787,  -9054,  -9325,  -9600,  -9879, -10162,
       -10449, -10740, -11035, -11334, -11637, -11944, -12255, -12570,
       -12889, -13212, -13539, -13870, -14205, -14544, -14887, -15234,
       -15585, -15940, -16299, -16662, -17029, -17400, -17775, -18154,
       -18537, -18924, -19315])

## Boolean Masks

A boolean mask is an array that contains all True or False values. For each True value in the boolean mask, a value is returned from an indexed array. For each False value in the boolean mask, the value is not returned from an indexed array.

In [76]:
a = np.array([1, 3, 8])
a

array([1, 3, 8])

In [77]:
mask = np.array([False, True, False])
mask

array([False,  True, False])

In [78]:
a[mask]

array([3])

In [79]:
# use a logical operation to create a boolean mask
# all the numbers less than 5
mask = a<5
mask

array([ True,  True, False])

In [80]:
a[mask]

array([1, 3])

In [81]:
# all the numbers greater than 4
mask = a>4
mask

array([False, False,  True])

In [82]:
a[mask]

array([8])

In [83]:
a = np.linspace(-2,100,64)
a

array([ -2.        ,  -0.38095238,   1.23809524,   2.85714286,
         4.47619048,   6.0952381 ,   7.71428571,   9.33333333,
        10.95238095,  12.57142857,  14.19047619,  15.80952381,
        17.42857143,  19.04761905,  20.66666667,  22.28571429,
        23.9047619 ,  25.52380952,  27.14285714,  28.76190476,
        30.38095238,  32.        ,  33.61904762,  35.23809524,
        36.85714286,  38.47619048,  40.0952381 ,  41.71428571,
        43.33333333,  44.95238095,  46.57142857,  48.19047619,
        49.80952381,  51.42857143,  53.04761905,  54.66666667,
        56.28571429,  57.9047619 ,  59.52380952,  61.14285714,
        62.76190476,  64.38095238,  66.        ,  67.61904762,
        69.23809524,  70.85714286,  72.47619048,  74.0952381 ,
        75.71428571,  77.33333333,  78.95238095,  80.57142857,
        82.19047619,  83.80952381,  85.42857143,  87.04761905,
        88.66666667,  90.28571429,  91.9047619 ,  93.52380952,
        95.14285714,  96.76190476,  98.38095238, 100.  

In [84]:
# only the possitve values
mask = a>0
a[mask]

array([  1.23809524,   2.85714286,   4.47619048,   6.0952381 ,
         7.71428571,   9.33333333,  10.95238095,  12.57142857,
        14.19047619,  15.80952381,  17.42857143,  19.04761905,
        20.66666667,  22.28571429,  23.9047619 ,  25.52380952,
        27.14285714,  28.76190476,  30.38095238,  32.        ,
        33.61904762,  35.23809524,  36.85714286,  38.47619048,
        40.0952381 ,  41.71428571,  43.33333333,  44.95238095,
        46.57142857,  48.19047619,  49.80952381,  51.42857143,
        53.04761905,  54.66666667,  56.28571429,  57.9047619 ,
        59.52380952,  61.14285714,  62.76190476,  64.38095238,
        66.        ,  67.61904762,  69.23809524,  70.85714286,
        72.47619048,  74.0952381 ,  75.71428571,  77.33333333,
        78.95238095,  80.57142857,  82.19047619,  83.80952381,
        85.42857143,  87.04761905,  88.66666667,  90.28571429,
        91.9047619 ,  93.52380952,  95.14285714,  96.76190476,
        98.38095238, 100.        ])

In [85]:
mask = a==66
a[mask]

array([66.])

In [86]:
locs = np.where(a==66.)
print(locs)
a[locs]

(array([42], dtype=int64),)


array([66.])

In [87]:
# numbers greater than 90 and the negative numbers
mask1 = a>90
mask2 = a<0
print(a[mask1])
print(a[mask2])

[ 90.28571429  91.9047619   93.52380952  95.14285714  96.76190476
  98.38095238 100.        ]
[-2.         -0.38095238]


In [88]:
mask1 or mask2   # can not use Python's and or keywords with arrays

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [89]:
mask = np.logical_or(mask1,mask2)   # need to use NumPy's np.logical_or() function instead
mask

array([ True,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True,  True,  True,  True,  True,  True,
        True])

In [90]:
a[mask]

array([ -2.        ,  -0.38095238,  90.28571429,  91.9047619 ,
        93.52380952,  95.14285714,  96.76190476,  98.38095238,
       100.        ])