In [3]:
import numpy as np
np.__version__

'1.12.1'

# Understanding Data Types in Python

## Creating Arrays from Python lists

In [5]:
# integer array:
np.array([1, 4, 2, 5, 3])

array([1, 4, 2, 5, 3])

If types do not match, NumPy will upcast if possible (here, integers are up-cast to floating point)

In [6]:
np.array([3.14, 4, 2, 3])

array([ 3.14,  4.  ,  2.  ,  3.  ])

In [7]:
np.array([1, 2, 3, 4], dtype='float32')

array([ 1.,  2.,  3.,  4.], dtype=float32)

In [14]:
# nested lists result in multi-dimensional arrays
np.array([range(i, i + 3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

## Creating arrays from scratch

Especially for larger arrays, it is more efficient to create arrays from scratch using routines built into NumPy

In [15]:
# Create a length-10 integer array filled with zeros
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [16]:
# Create a 3X5 floating-point array filled with ones
np.ones((3, 5), dtype=float)

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [17]:
# Create a 3X5 array filled with 3.14
np.full((3, 5), 3.14)

array([[ 3.14,  3.14,  3.14,  3.14,  3.14],
       [ 3.14,  3.14,  3.14,  3.14,  3.14],
       [ 3.14,  3.14,  3.14,  3.14,  3.14]])

In [18]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [19]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])

In [31]:
# Create a 3x3 array of uniformly distributed
# random values between 0 and 1
np.random.random((3, 3))

array([[ 0.8769533 ,  0.16557556,  0.19877327],
       [ 0.66273142,  0.06731937,  0.03940781],
       [ 0.90452415,  0.00923252,  0.64297571]])

In [32]:
# Create a 3X3 array of normally distributed random values 
# with mean 0 and standard deviation 1
np.random.normal(0, 1, (3,3))

array([[ 1.33828236, -1.98813338, -0.29657471],
       [-0.57633738,  0.67956652,  0.35101804],
       [-1.22072145, -0.8487272 ,  1.17184729]])

In [33]:
# Create a 3X3 array of random integers in the interval [0, 10]
np.random.randint(0, 10, (3, 3))

array([[8, 0, 6],
       [0, 2, 6],
       [9, 1, 0]])

In [34]:
# Create a 3X3 identity matrix
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [35]:
# Create an uninitialized array of three integers
# The values will be whatever happend to already exist at that memory location
np.empty(3)

array([ 1.,  1.,  1.])

## NumPy standard data types

when constructing an array, they can be specified using a string:

In [36]:
np.zeros(10, dtype='int16')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

Or using the associated NumPy object:

In [37]:
np.zeros(10, dtype=np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

**Data type** | **Description**
--- | ---
`bool_` | Boolean (True or False) stored as a byte
`int_` | Default integer type (same as C `long`; normally either `int64` or `int32`)
`intc` | Identical to C `int` (normally `int32` or `int64`)
`intp` | Integer used for indexing (same as C `ssize_t`; normally either `int32` or `int64`)
`int8` | Byte (-128 to 127)
`int16` | Integer (-32768 to 32767)
`int32` | Integer (-2147483648 to 2147483647)
`int64`	| Integer (-9223372036854775808 to 9223372036854775807)
`uint8`	| Unsigned integer (0 to 255)
`uint16` | Unsigned integer (0 to 65535)
`uint32` | Unsigned integer (0 to 4294967295)
`uint64` | Unsigned integer (0 to 18446744073709551615)
`float_` | Shorthand for `float64`.
`float16` | Half precision float: sign bit, 5 bits exponent, 10 bits mantissa
`float32` |	Single precision float: sign bit, 8 bits exponent, 23 bits mantissa
`float64` | Double precision float: sign bit, 11 bits exponent, 52 bits mantissa
`complex_` | Shorthand for complex128.
`complex64`	| Complex number, represented by two 32-bit floats
`complex128` | Complex number, represented by two 64-bit floats

# Basics of NumPy Arrays

## Attributes of arrays: 

Determining the size, shape, memory consumption, and data types of arrays

In [38]:
np.random.seed(0) # seed for reproducibility

x1 = np.random.randint(10, size=6)
x2 = np.random.randint(10, size=(3, 4))
x3 = np.random.randint(10, size=(3, 4, 5))

In [40]:
print("x3 ndim:", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size:", x3.size)
print("x3 dtype:", x3.dtype)
print("x3 itemsize:", x3.itemsize, "bytes")
print("x3 nbytes:", x3.nbytes, "bytes")

x3 ndim: 3
x3 shape: (3, 4, 5)
x3 size: 60
x3 dtype: int64
x3 itemsize: 8 bytes
x3 nbytes: 480 bytes


## Indexing of arrays: 

Getting and setting the value of individual array elements

### Accessing single element

In [41]:
x1

array([5, 0, 3, 3, 7, 9])

In [42]:
x1[0]

5

In [43]:
x1[4]

7

In [44]:
x1[-1]

9

In [45]:
x1[-2]

7

In [46]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [47]:
x2[0, 0]

3

In [48]:
x2[2, 0]

1

In [49]:
x2[2, -1]

7

In [50]:
x2[0, 0] = 12
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [51]:
x1[0] = 3.14159  # this will be truncated!
x1

array([3, 0, 3, 3, 7, 9])

## Slicing of arrays: 

Getting and setting smaller subarrays within a larger array

### One-dimensional subarrays

In [52]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [53]:
x[:5]

array([0, 1, 2, 3, 4])

In [54]:
x[5:]

array([5, 6, 7, 8, 9])

In [55]:
x[4:7]

array([4, 5, 6])

In [56]:
x[::2]

array([0, 2, 4, 6, 8])

In [57]:
x[1::2]

array([1, 3, 5, 7, 9])

In [58]:
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [59]:
x[5::-2]

array([5, 3, 1])

### Multi-dimensional subarrays

In [60]:
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [61]:
x2[:2, :3]

array([[12,  5,  2],
       [ 7,  6,  8]])

In [62]:
x2[:3, ::2]

array([[12,  2],
       [ 7,  8],
       [ 1,  7]])

In [64]:
x2[::-1, ::-1]

array([[ 7,  7,  6,  1],
       [ 8,  8,  6,  7],
       [ 4,  2,  5, 12]])

**Accessing array rows and columns**

In [65]:
print(x2[:, 0])

[12  7  1]


In [66]:
print(x2[0, :])

[12  5  2  4]


In [67]:
print(x2[0]) # equivalent to x2[0, :]

[12  5  2  4]


### Subarrays as no-copy views

In [68]:
print(x2)

[[12  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [69]:
x2_sub = x2[:2, :2]
print(x2_sub)

[[12  5]
 [ 7  6]]


Now if we modify this subarray, we'll see that the original array is changed! 

In [70]:
x2_sub[0, 0] = 99
print(x2_sub)

[[99  5]
 [ 7  6]]


In [71]:
print(x2)

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


### Creating copies of arrays

In [72]:
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)

[[99  5]
 [ 7  6]]


In [73]:
x2_sub_copy[0, 0] = 42
print(x2_sub_copy)

[[42  5]
 [ 7  6]]


In [74]:
print(x2)

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


## Reshaping of arrays: 

Changing the shape of a given array

In [75]:
grid = np.arange(1, 10).reshape((3,3))
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [76]:
x = np.array([1, 2, 3])

# row vector via reshape
x.reshape((1,3))

array([[1, 2, 3]])

In [77]:
# row vector via newaxis
x[np.newaxis, :]

array([[1, 2, 3]])

In [78]:
# column vectior via reshape
x.reshape((3,1))

array([[1],
       [2],
       [3]])

In [79]:
# column vector via newaxis
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

## Array Concatenation and Splitting: 

Combining multiple arrays into one, and splitting one array into many

### Concatenation of arrays

In [80]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [81]:
z = [99, 99, 99]
print(np.concatenate([x, y, z]))

[ 1  2  3  3  2  1 99 99 99]


In [83]:
grid = np.array([[1, 2, 3],
               [4, 5, 6]])

In [85]:
# concatenate along the first axis
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [86]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

For working with arrays of mixed dimensions, it can be clearer to use the `np.vstack` (vertical stack) and `np.hstack` (horizontal stack) functions:

In [87]:
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                [6, 5, 4]])

# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [88]:
# horizontally stack the arrays
y = np.array([[99],
            [99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

### Splitting of arrays

For each of these, we can pass a list of indices giving the split points:

In [90]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3,5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [91]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [92]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [93]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


# Computation on NumPy Arrays: Universal Functions

## The slowness of loops

In [94]:
import numpy as np
np.random.seed(0)

def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output

values = np.random.randint(1, 10, size=5)
compute_reciprocals(values)

array([ 0.16666667,  1.        ,  0.25      ,  0.25      ,  0.125     ])

In [96]:
big_array = np.random.randint(1, 100, size=1000000)
%timeit compute_reciprocals(big_array)

3.01 s ± 134 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Introducting UFuncs

*vectorized operations*

In [97]:
print(compute_reciprocals(values))
print(1.0 / values)

[ 0.16666667  1.          0.25        0.25        0.125     ]
[ 0.16666667  1.          0.25        0.25        0.125     ]


In [98]:
%timeit 1.0 / big_array

4.16 ms ± 176 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


Ufuncs are extremely flexible – before we saw an operation between a scalar and an array, but we can also operate between two arrays:

In [99]:
np.arange(5) / np.arange(1, 6)

array([ 0.        ,  0.5       ,  0.66666667,  0.75      ,  0.8       ])

And ufunc operations are not limited to one-dimensional arrays–they can also act on multi-dimensional arrays as well:

In [100]:
x = np.arange(9).reshape((3, 3))
2 ** x

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]])

## Exploring NumPy's UFuncs

*unary ufuncs*, which operate on a single input, and *binary ufuncs*, which operate on two inputs

### Array arithmetic

In [103]:
x = np.arange(4)
print("x     =", x)
print("x + 5 =", x + 5)
print("x - 5 =", x - 5)
print("x * 2 =", x * 2)
print("x / 2 =", x / 2)
print("x // 2 =", x // 5)    # floor division

x     = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]
x / 2 = [ 0.   0.5  1.   1.5]
x // 2 = [0 0 0 0]


In [104]:
print("-x     = ", -x)
print("x ** 2 = ", x ** 2)
print("x % 2  = ", x % 2)

-x     =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2  =  [0 1 0 1]


In [105]:
-(0.5*x + 1) ** 2

array([-1.  , -2.25, -4.  , -6.25])

In [106]:
np.add(x, 2)

array([2, 3, 4, 5])

The following table lists the arithmetic operators implemented in NumPy:

Operator |	Equivalent ufunc |	Description
--- | --- | ---
+ |	`np.add`	| Addition (e.g., `1 + 1 = 2`)
- | `np.subtract` |	Subtraction (e.g., `3 - 2 = 1`)
- |	`np.negative` |	Unary negation (e.g., `-2`)
* |	`np.multiply` |	Multiplication (e.g., `2 * 3 = 6`)
/ |	`np.divide` |	Division (e.g., `3 / 2 = 1.5`)
// |	`np.floor_divide` |	Floor division (e.g., `3 // 2 = 1`)
** |	`np.power` |	Exponentiation (e.g., `2 ** 3 = 8`)
% |	`np.mod` |	Modulus/remainder (e.g., `9 % 4 = 1`)

In [107]:
### Absolute value

In [108]:
x = np.array([-2, -1, 0, 1, 2])
abs(x)

array([2, 1, 0, 1, 2])

In [109]:
np.absolute(x)

array([2, 1, 0, 1, 2])

In [110]:
np.abs(x)

array([2, 1, 0, 1, 2])

In [111]:
x = np.array([3 - 4j, 4 - 3j, 2 + 0j, 0 + 1j])
np.abs(x)

array([ 5.,  5.,  2.,  1.])

### Trignometric functions

In [112]:
theta = np.linspace(0, np.pi, 3)

In [113]:
print("theta      = ", theta)
print("sin(theta) = ", np.sin(theta))
print("cos(theta) = ", np.cos(theta))
print("tan(theta) = ", np.tan(theta))

theta      =  [ 0.          1.57079633  3.14159265]
sin(theta) =  [  0.00000000e+00   1.00000000e+00   1.22464680e-16]
cos(theta) =  [  1.00000000e+00   6.12323400e-17  -1.00000000e+00]
tan(theta) =  [  0.00000000e+00   1.63312394e+16  -1.22464680e-16]


In [114]:
x = [-1, 0, 1]
print("x         = ", x)
print("arcsin(x) = ", np.arcsin(x))
print("arccos(x) = ", np.arccos(x))
print("arctan(x) = ", np.arctan(x))

x         =  [-1, 0, 1]
arcsin(x) =  [-1.57079633  0.          1.57079633]
arccos(x) =  [ 3.14159265  1.57079633  0.        ]
arctan(x) =  [-0.78539816  0.          0.78539816]


In [115]:
### Exponents and logarithms

In [116]:
x = [1, 2, 3]
print("x     =", x)
print("e^x   =", np.exp(x))
print("2^x   =", np.exp2(x))
print("3^x   =", np.power(3, x))

x     = [1, 2, 3]
e^x   = [  2.71828183   7.3890561   20.08553692]
2^x   = [ 2.  4.  8.]
3^x   = [ 3  9 27]


In [117]:
x = [1, 2, 4, 10]
print("x        =", x)
print("ln(x)    =", np.log(x))
print("log2(x)  =", np.log2(x))
print("log10(x) =", np.log10(x))

x        = [1, 2, 4, 10]
ln(x)    = [ 0.          0.69314718  1.38629436  2.30258509]
log2(x)  = [ 0.          1.          2.          3.32192809]
log10(x) = [ 0.          0.30103     0.60205999  1.        ]


There are also some specialized versions that are useful for maintaining precision with very small input:

In [118]:
x = [0, 0.001, 0.01, 0.1]
print("exp(x) - 1 =", np.expm1(x))
print("log(1 + x) =", np.log1p(x))

exp(x) - 1 = [ 0.          0.0010005   0.01005017  0.10517092]
log(1 + x) = [ 0.          0.0009995   0.00995033  0.09531018]


When `x` is very small, these functions give more precise values than if the raw `np.log` or `np.exp` were to be used.

### Specialized ufuncs

Another excellent source for more specialized and obscure ufuncs is the submodule `scipy.special`

In [119]:
from scipy import specialcial

In [120]:
# Gamma functions (generalized factorials) and related functions
x = [1, 5, 10]
print("gamma(x)     =", special.gamma(x))
print("ln|gamma(x)| =", special.gammaln(x))
print("beta(x, 2)   =", special.beta(x, 2))

gamma(x)     = [  1.00000000e+00   2.40000000e+01   3.62880000e+05]
ln|gamma(x)| = [  0.           3.17805383  12.80182748]
beta(x, 2)   = [ 0.5         0.03333333  0.00909091]


In [121]:
# Error function (integral of Gaussian)
# its complement, and its inverse
x = np.array([0, 0.3, 0.7, 1.0])
print("erf(x)  =", special.erf(x))
print("erfc(x) =", special.erfc(x))
print("erfinv(x) =", special.erfinv(x))

erf(x)  = [ 0.          0.32862676  0.67780119  0.84270079]
erfc(x) = [ 1.          0.67137324  0.32219881  0.15729921]
erfinv(x) = [ 0.          0.27246271  0.73286908         inf]


## Advanced Ufunc Features

### Specifying output

For large calculations, it is sometimes useful to be able to specify the array where the result of the calculation will be stored. Rather than creating a temporary array, this can be used to write computation results directly to the memory location where you'd like them to be

In [122]:
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 10, out=y)
print(y)

[  0.  10.  20.  30.  40.]


This can even be used with array views. For example, we can write the results of a computation to every other element of a specified array:

In [124]:
y = np.zeros(10)
np.power(2, x, out=y[::2])
print(y)

[  1.   0.   2.   0.   4.   0.   8.   0.  16.   0.]


If we had instead written `y[::2] = 2 ** x`, this would have resulted in the creation of a temporary array to hold the results of `2 ** x`, followed by a second operation copying those values into the `y` array

### Aggregates

A reduce repeatedly applies a given operation to the elements of an array until only a single result remains.

In [126]:
x = np.arange(1, 6)
np.add.reduce(x)

15

In [127]:
np.multiply.reduce(x)

120

If we'd like to store all the intermediate results of the computation, we can instead use `accumulate`:

In [128]:
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15])

In [129]:
np.multiply.accumulate(x)

array([  1,   2,   6,  24, 120])

### Outer products

This allows you, in one line, to do things like create a multiplication table:

In [130]:
x = np.arange(1, 6)
np.multiply.outer(x, x)

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])

# Aggregations

## Summing the Values in an array