# [Numpy](http://www.numpy.org)

## 1. Understanding Data Types in Python 

### 1.1 Creating Arrays from Python Lists 

In [1]:
import numpy as np

In [2]:
np.__version__

'1.16.2'

In [3]:
# Creating arrays using python lists

In [4]:
?np.array

In [5]:
np.array([1, 2, 3, 4, 5]) # integer array

array([1, 2, 3, 4, 5])

In [6]:
np.array([3.14, 4, 5, 3 ], dtype = 'float32' )

array([3.14, 4.  , 5.  , 3.  ], dtype=float32)

In [7]:
# nested lists result in multidimensional arrays

In [8]:
np.array([range(i, i+3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

### 1.2 Creating Arrays from Scratch

In [9]:
# Create a length-10 integer array filled with zeros
np.zeros(10, dtype = int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [10]:
# Create a 3x5 floating-point array filled with 1s
np.ones((3, 5), dtype = float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [11]:
#Create a 3x5 array filled with 3.14
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [12]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [13]:
#Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [14]:
# Create a identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [15]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that
# memory location
np.empty(3)

array([1., 1., 1.])

## 1.3 Random Numbers

In [16]:
# Uniformly distributed 
np.random.random((3, 3))

array([[0.67109417, 0.14894365, 0.53859379],
       [0.60948371, 0.65426063, 0.28822071],
       [0.42598124, 0.80733957, 0.46416308]])

In [17]:
# Normal distributed: mean and std
np.random.normal(0, 1, (3, 3))

array([[-0.33993301,  0.25822781, -1.05238366],
       [-0.65808545,  0.58616778,  1.29495311],
       [-0.76795104,  2.56231919, -1.6992404 ]])

In [18]:
#Create a 3x3 array of random integers in the interval [0, 10)
np.random.randint(0, 10, (3, 3))

array([[3, 5, 7],
       [2, 6, 8],
       [3, 0, 2]])

## 2. The Basics of Numpy Arrays

We’ll cover a few categories of basic array manipulations here:
* Attributes of arrays
> Determining the size, shape, memory consumption, and data types of arrays
* Indexing of arrays
> Getting and setting the value of individual array elements
* Slicing of arrays
> Getting and setting smaller subarrays within a larger array
* Reshaping of arrays
> Changing the shape of a given array
* Joining and splitting of arrays
> Combining multiple arrays into one, and splitting one array into many

### 2.1 Attributes of array 

In [19]:
np.random.seed(0) # seed for reproducibility

In [20]:
x1 = np.random.randint(10, size = 6) #one-dimensional array 
x2 = np.random.randint(10, size = (3, 4)) # two-dimensional array
x3 = np.random.randint(10, size = (3, 4, 5))# three-dimensional array

In [21]:
x3

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [22]:
"""Each array has attributes ndim (the number of dimensions), shape (the size of each
dimension), and size (the total size of the array):"""

x3.ndim, x3.shape, x3.size

(3, (3, 4, 5), 60)

In [23]:
# Another useful attribute is the dtype, the data type of the array
x3.dtype

dtype('int32')

In [24]:
"""Other attributes include itemsize, which lists the size (in bytes) of each array element,
and nbytes, which lists the total size (in bytes) of the array:"""
x3.itemsize, x3.nbytes

(4, 240)

### 2.2 Indexing of array

In [25]:
# Accessing Single elements 
x1[0], x1[4]

(5, 7)

In [26]:
#To index from the end of the array, you can use negative indices
x1[-1]

9

In [27]:
# Multidimensional array
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [28]:
# array[rownumber, colnumber]
x2[0, 0]

3

In [29]:
x2[2, -2]

7

In [30]:
#You can also modify values using any of the above index notation
x2[0, 0] = 12; x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

### 2.3 Slicing of arrays

In [31]:
# Accessing subarrays 
x1[:5]

array([5, 0, 3, 3, 7])

In [32]:
x1[5:]

array([9])

In [33]:
x1[3:5] # with the upper and lower limit

array([3, 7])

In [34]:
x1[::2] # every other element

array([5, 3, 7])

In [35]:
x1[::-1] # reversed

array([9, 7, 3, 3, 0, 5])

In [36]:
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [37]:
x2[:2 , :3] # two rows, three columns

array([[12,  5,  2],
       [ 7,  6,  8]])

In [38]:
x2[:,::2] # all rows and every other column

array([[12,  2],
       [ 7,  8],
       [ 1,  7]])

In [39]:
x2[::-1,::-1] # reversed

array([[ 7,  7,  6,  1],
       [ 8,  8,  6,  7],
       [ 4,  2,  5, 12]])

#### subarrays as no-copy views

In [40]:
x2_sub = x2[:2, :2]; x2_sub

array([[12,  5],
       [ 7,  6]])

In [41]:
x2_sub[0, 0] = 99; x2_sub

array([[99,  5],
       [ 7,  6]])

In [42]:
# Modification in x2_sub had changed in x2 as well
x2

array([[99,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [43]:
x2_sub_copy = x2[:2, :2].copy(); x2_sub_copy

array([[99,  5],
       [ 7,  6]])

In [44]:
x2_sub_copy[0,0] = 42; x2_sub_copy

array([[42,  5],
       [ 7,  6]])

In [45]:
#This time the modification is not done to the original array
x2

array([[99,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

### 2.4 Reshaping the arrays  

In [46]:
grid = np.arange(1, 10).reshape((3, 3)); grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

Another common reshaping pattern is the conversion of a one-dimensional array
into a two-dimensional row or column matrix. You can do this with the reshape
method, or more easily by making use of the newaxis keyword within a slice operation.

In [47]:
x = np.array([[1, 2, 3], [4, 5, 6]])
x.reshape((2, 3)), x.ndim, x.shape

(array([[1, 2, 3],
        [4, 5, 6]]), 2, (2, 3))

In [48]:
x[np.newaxis,:], x.ndim, x.shape

(array([[[1, 2, 3],
         [4, 5, 6]]]), 2, (2, 3))

### 2.5 Joining and splitting the arrays

In [49]:
# concatenate array
x= np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [50]:
z = [99, 99, 99]
np.concatenate([x, y, z])

array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])

In [51]:
grid = np.array([[1, 2, 3], [3, 2, 1]])

In [52]:
np.concatenate([grid, grid])

array([[1, 2, 3],
       [3, 2, 1],
       [1, 2, 3],
       [3, 2, 1]])

In [53]:
np.concatenate([grid, grid], axis = 1)

array([[1, 2, 3, 1, 2, 3],
       [3, 2, 1, 3, 2, 1]])

In [54]:
# vstack 
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],[6, 5, 4]])
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [55]:
# horizontally stack the arrays
y = np.array([[99], [99]])

np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

In [56]:
# splitting of arrays

x = [1, 2, 3, 99, 99, 3, 2, 1]

x1, x2, x3 = np.split(x, [3, 5])

print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [57]:
grid = np.arange(16).reshape((4, 4))

In [58]:
upper, lower = np.vsplit(grid, [2])

In [59]:
print(upper), print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


(None, None)

In [60]:
left, right = np.hsplit(grid, [2])

In [61]:
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## 3. Computation on Numpy Arrays: Universal functions 

### 3.1 The slowness of loops

Vectorized operations in NumPy are implemented via ufuncs, whose main purpose is
to quickly execute repeated operations on values in NumPy arrays. Ufuncs are
extremely flexible—before we saw an operation between a scalar and an array, but we
can also operate between two arrays

### 3.2 Exploring UFuncs

![Image](UFuncs.png)

In [62]:
x = np.arange(4)

NumPy’s ufuncs feel very natural to use because they make use of Python’s native
arithmetic operators. The standard addition, subtraction, multiplication, and division
can all be used

In [63]:
print("x =", x)
print("x + 5 =", x + 5)
print("x - 5 =", x - 5)
print("x * 2 =", x * 2)
print("x / 2 =", x / 2)
print("x // 2 =", x // 2) # floor division

x = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]
x / 2 = [0.  0.5 1.  1.5]
x // 2 = [0 0 1 1]


There is also a unary ufunc for negation, a ** operator for exponentiation, and a % operator for modulus.

In [64]:
print("-x = ", -x)
print("x ** 2 = ", x ** 2)
print("x % 2 = ", x % 2)

-x =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2 =  [0 1 0 1]


In [65]:
# abs value 
np.abs([-2, -1, 0, 1, 2])

array([2, 1, 0, 1, 2])

#### Trignometric Ufuncs

In [66]:
theta = np.linspace(0, np.pi, 3)
#Now we can compute some trigonometric functions on these values:
print("theta = ", theta)
print("sin(theta) = ", np.sin(theta))
print("cos(theta) = ", np.cos(theta))
print("tan(theta) = ", np.tan(theta))

theta =  [0.         1.57079633 3.14159265]
sin(theta) =  [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos(theta) =  [ 1.000000e+00  6.123234e-17 -1.000000e+00]
tan(theta) =  [ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]


In [67]:
x = [-1, 0, 1]
print("x = ", x)
print("arcsin(x) = ", np.arcsin(x))
print("arccos(x) = ", np.arccos(x))
print("arctan(x) = ", np.arctan(x))

x =  [-1, 0, 1]
arcsin(x) =  [-1.57079633  0.          1.57079633]
arccos(x) =  [3.14159265 1.57079633 0.        ]
arctan(x) =  [-0.78539816  0.          0.78539816]


#### Exponents and logarithms

In [68]:
x = [1, 2, 3]
print("x =", x)
print("e^x =", np.exp(x))
print("2^x =", np.exp2(x))
print("3^x =", np.power(3, x))

x = [1, 2, 3]
e^x = [ 2.71828183  7.3890561  20.08553692]
2^x = [2. 4. 8.]
3^x = [ 3  9 27]


The inverse of the exponentials, the logarithms, are also available. The basic np.log
gives the natural logarithm; if you prefer to compute the base-2 logarithm or the
base-10 logarithm, these are available as well

In [69]:
x = [1, 2, 4, 10]
print("x =", x)
print("ln(x) =", np.log(x))
print("log2(x) =", np.log2(x))
print("log10(x) =", np.log10(x))

x = [1, 2, 4, 10]
ln(x) = [0.         0.69314718 1.38629436 2.30258509]
log2(x) = [0.         1.         2.         3.32192809]
log10(x) = [0.         0.30103    0.60205999 1.        ]


There are also some specialized versions that are useful for maintaining precision
with very small input

In [70]:
x = [0, 0.001, 0.01, 0.1]
print("exp(x) - 1 =", np.expm1(x))
print("log(1 + x) =", np.log1p(x))

exp(x) - 1 = [0.         0.0010005  0.01005017 0.10517092]
log(1 + x) = [0.         0.0009995  0.00995033 0.09531018]


#### Aggregates
For binary ufuncs, there are some interesting aggregates that can be computed
directly from the object. For example, if we’d like to reduce an array with a particular
operation, we can use the reduce method of any ufunc. A reduce repeatedly applies a
given operation to the elements of an array until only a single result remains.
For example, calling reduce on the add ufunc returns the sum of all elements in the
array

In [71]:
x = np.arange(1, 6)

In [72]:
np.add.reduce(x)

15

In [73]:
np.multiply.reduce(x)

120

In [74]:
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15], dtype=int32)

In [75]:
# Big array
big_array = np.random.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)

228 ms ± 32.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.24 ms ± 156 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [76]:
np.min(big_array), np.max(big_array)

(1.4057692298008462e-06, 0.9999994392723005)

In [77]:
%timeit min(big_array)
%timeit np.min(big_array)

150 ms ± 1.97 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
793 µs ± 18.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


![Aggregates](Aggregrates.PNG)

## 4. Computation on Arrays - Broadcasting

### Rules of Broadcasting
Broadcasting in NumPy follows a strict set of rules to determine the interaction
between the two arrays:
* Rule 1: If the two arrays differ in their number of dimensions, the shape of the
one with fewer dimensions is padded with ones on its leading (left) side.
* Rule 2: If the shape of the two arrays does not match in any dimension, the array
with shape equal to 1 in that dimension is stretched to match the other shape.
* Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is
raised.

In [78]:
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])

In [79]:
a + 5 

array([5, 6, 7])

In [80]:
M = np.ones((3, 3))

In [81]:
M + a

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [82]:
a = np.arange(3)
b= np.arange(3)[:, np.newaxis]
print(a)
print(b)

[0 1 2]
[[0]
 [1]
 [2]]


In [83]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [84]:
# Brodcating example 1

M = np.ones((2, 3))
M.shape

(2, 3)

In [85]:
a = np.arange(3)
a.shape

(3,)

Let’s consider an operation on these two arrays. The shapes of the arrays are:
___________________________________________

M.shape = (2, 3)
a.shape = (3,)
____________________________________________

We see by rule 1 that the array a has fewer dimensions, so we pad it on the left with
ones:
______________________________________________

M.shape -> (2, 3)
a.shape -> (1, 3)
______________________________________________

By rule 2, we now see that the first dimension disagrees, so we stretch this dimension
to match:
M.shape -> (2, 3)
a.shape -> (2, 3)

In [86]:
M + a

array([[1., 2., 3.],
       [1., 2., 3.]])

### 5. Comparisons, Masks and Boolean Logic

#### Comparisons

In [87]:
x = np.array([1, 2, 3, 4, 5])

In [88]:
x < 3 

array([ True,  True, False, False, False])

In [89]:
x > 3

array([False, False, False,  True,  True])

In [90]:
x != 3

array([ True,  True, False,  True,  True])

In [91]:
x == 3

array([False, False,  True, False, False])

It is also possible to do an element-by-element comparison of two arrays, and to
include compound expressions

In [92]:
(2 * x) == (x ** 2)

array([False,  True, False, False, False])

#### Boolean Arrays

In [93]:
rng = np.random.RandomState(0)
x = rng.randint(10, size=(3, 4))
np.count_nonzero(x < 6)

8

In [94]:
# how many values less than 6 in each row?
np.sum(x < 6, axis=1)

array([4, 2, 2])

In [95]:
# are there any values greater than 8?
np.any(x > 8)

True

In [96]:
#are all values less than 10?
np.all(x < 10)

True

The following table summarizes the bitwise Boolean operators and their equivalent
ufuncs:
--------- -----------------    
|   Operator| Equivalent ufunc|
|:---------:| :--------------:|
| &         | np.bitwise_and  |
| |         | np.bitwise_or   |
| ^         | np.bitwise_xor  |
| ~         | np.bitwise_not  |

#### Boolean Arrays as Masks

In [97]:
x

array([[5, 0, 3, 3],
       [7, 9, 3, 5],
       [2, 4, 7, 6]])

In [98]:
x < 5

array([[False,  True,  True,  True],
       [False, False,  True, False],
       [ True,  True, False, False]])

In [99]:
x[x < 5]

array([0, 3, 3, 3, 2, 4])

#### Fancy Indexing

In [100]:
rand = np.random.RandomState(0)

In [101]:
x = rand.randint(100, size = 10)
print(x)

[44 47 64 67 67  9 83 21 36 87]


In [102]:
ind = [3, 7, 4]

In [103]:
x[ind]

array([67, 21, 67])

In [104]:
ind = np.array([[3, 7], [4, 5]])

In [105]:
x[ind]

array([[67, 21],
       [67,  9]])

#### Combined Indexing

In [106]:
X = np.arange(12).reshape((3, 4)); X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [107]:
X[2, [2, 0, 1]]

array([10,  8,  9])

In [108]:
X.shape

(3, 4)

In [109]:
X[:,np.newaxis].shape

(3, 1, 4)

#### Modifying Values with Fancy Indexing

In [110]:
x = np.arange(10)
i = np.array([2, 1, 8, 4])
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [111]:
x[i] = 99; x

array([ 0, 99, 99,  3, 99,  5,  6,  7, 99,  9])

#### Sorting 

In [112]:
x = np.array([2, 1, 4, 3, 5])

In [113]:
np.sort(x)

array([1, 2, 3, 4, 5])

In [114]:
i = np.argsort(x); i

array([1, 0, 3, 2, 4], dtype=int64)

#### Partial Sorts: Partitioning


Sometimes we’re not interested in sorting the entire array, but simply want to find the
K smallest values in the array. NumPy provides this in the np.partition function.
np.partition takes an array and a number K; the result is a new array with the smallest
K values to the left of the partition, and the remaining values to the right, in arbitrary
order

In [115]:
x = np.array([7, 2, 3, 1, 6, 5, 4])
np.partition(x, 3)

array([2, 1, 3, 4, 6, 5, 7])

### 6. Structured Data 

In [116]:
name = ['Alice', 'Bob', 'Cathy', 'Doug']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

In [117]:
#Recall that previously we created a simple array using an expression like this:
x = np.zeros(4, dtype=int); x

array([0, 0, 0, 0])

In [118]:
# We can similarly create a structured array using a compound data type specification:
# Use a compound data type for structured arrays
data = np.zeros(4, dtype={'names':('name', 'age', 'weight'), 'formats':('U10', 'i4', 'f8')})
print(data.dtype)

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]


In [119]:
data['name'] = name 
data['age'] = age
data['weight'] = weight

In [120]:
print(data)

[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Doug', 19, 61.5)]


In [121]:
data['name']

array(['Alice', 'Bob', 'Cathy', 'Doug'], dtype='<U10')

![DataTypes](DataTypes.PNG)