# Introduction to NumPy

In [1]:
import numpy as np

In [2]:
np.__version__

'1.15.4'

In [3]:
## An example of a python code
result = 0 
for i in range(100):    
    result += i 

## A Python List Is More Than Just a List

In [4]:
L = list(range(10))

In [5]:
L

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [6]:
type(L[0])

int

In [7]:
L2 = [str(c) for c in L]
L2

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [8]:
type(L2[0])

str

In [9]:
## Creating a heterogenous lists
L3 = [True, "2", 3.0, 4]
[type(item) for item in L3]

[bool, str, float, int]

## Fixed Type Arrays in Python

In [10]:
import array

In [11]:
L = list(range(10))
A = array.array('i', L)
A

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

### Creating Arrays from Python Lists

In [12]:
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [13]:
np.array([3.14,5,6,3]) ## np.array can only store elements in one data type

array([3.14, 5.  , 6.  , 3.  ])

In [14]:
np.array([1,2,3,4], dtype = "float32")

array([1., 2., 3., 4.], dtype=float32)

In [15]:
np.array([range(i, i + 3) for i in [2,4,6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

### Creating Arrays from Scratch

In [16]:
np.zeros(10, dtype = int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [17]:
np.ones((3,5), dtype = float) ## (3,5) specifies the number of rows and the number of columns.

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [18]:
np.full((3,5), 3.14) ## np.full fills up the array with that number

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [18]:
 np.arange(0, 20, 2) ## This creates a sequantial array in increasing order of 2

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [20]:
np.linspace(0, 1, 5) ## This creates an array of five equally spaced values spaced between 0 and 1

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [21]:
np.random.random((3,3)) ## This creates a three-by-three randomly generated array that are uniformly distributed between 0 and 1

array([[0.77842165, 0.62948225, 0.00691378],
       [0.08384796, 0.9005218 , 0.76296815],
       [0.0272459 , 0.84079478, 0.07875959]])

In [22]:
np.random.normal(0,1,(3,3)) ## This creates a three-by-three randomly generated array that is normally distributed with a mean of 0 and stdev of 1

array([[ 0.85583826,  0.76488567, -0.63674161],
       [-0.06877905, -2.15774848, -0.30973492],
       [-1.02329494, -0.28162672, -0.50904452]])

In [23]:
np.random.randint(0,10,(3,3)) ## This creates a three-by-three array of random integers from 0 to 10

array([[0, 0, 2],
       [3, 2, 2],
       [2, 1, 5]])

In [24]:
np.eye(3) ## creates an identity matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [25]:
np.empty(4) ## creates and unutilized array of n integers

array([0.25, 0.5 , 0.75, 1.  ])

## NumPy Standard Data Types

In [26]:
np.zeros(10, dtype = 'int16')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [27]:
np.zeros(10, dtype=np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

## The Basics of NumPy arrays

### NumPy Array Attributes

In [26]:
np.random.seed(0)
x1 = np.random.randint(10, size=6)  # One-dimensional array     
x2 = np.random.randint(10, size=(3, 4))  # Two-dimensional array       
x3 = np.random.randint(10, size=(3, 4, 5))  # Three-dimensional array 

In [27]:
x1

array([5, 0, 3, 3, 7, 9])

In [28]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [29]:
x3

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [32]:
print("x3 ndim: ", x3.ndim)       
print("x3 shape:", x3.shape)       
print("x3 size: ", x3.size)

x3 ndim:  3
x3 shape: (3, 4, 5)
x3 size:  60


In [30]:
print("dtype:", x3.dtype)

dtype: int64


In [31]:
print("itemsize:", x3.itemsize, "bytes") ## outputs the size of bytes of each array element
print("nbytes:", x3.nbytes, "bytes") ## outputs the total size of bytes of all the array element

itemsize: 8 bytes
nbytes: 480 bytes


### Array Indexing: Accessing Single Elements

In [32]:
x1[0] ## for accessing a one dimensional array

5

In [33]:
x2[2,3] ## for accessing a two dimensional array

7

In [34]:
x3[1,2,3] ## for accessing a three dimensional array

4

In [35]:
x2[2,3] = 3 ## values can be modified
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 3]])

In [36]:
x3[1,2,3] = 5
x3

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 5, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

### Array Slicing: Accessing Subarrays

#### One dimensional subarrays

In [37]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [38]:
x[:5]

array([0, 1, 2, 3, 4])

In [39]:
x[5:]

array([5, 6, 7, 8, 9])

In [40]:
x[4:7]

array([4, 5, 6])

In [41]:
x[::2] ## creating a sequential subarray

array([0, 2, 4, 6, 8])

In [42]:
x[1::2]

array([1, 3, 5, 7, 9])

In [43]:
x[::-1] ## all elements reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [44]:
x[5::-2]

array([5, 3, 1])

#### Multidimensional arrays

In [45]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 3]])

In [46]:
x2[:2,:3]

array([[3, 5, 2],
       [7, 6, 8]])

##### Accessing array rows and columns

In [47]:
print(x2[:, 0]) ## first column of x2

[3 7 1]


In [48]:
print(x2[0, :]) ## first row of x2

[3 5 2 4]


#### Subarrays as no-copy views

In [49]:
print(x2)

[[3 5 2 4]
 [7 6 8 8]
 [1 6 7 3]]


In [50]:
x2_sub = x2[:2,:2]
print(x2_sub)

[[3 5]
 [7 6]]


In [51]:
x2_sub[0,0] = 69
x2_sub

array([[69,  5],
       [ 7,  6]])

In [52]:
x2

array([[69,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  3]])

#### Creating copies of arrays

In [53]:
x2_sub_copy = x2[:2, :2].copy()        
print(x2_sub_copy)

[[69  5]
 [ 7  6]]


In [54]:
x2_sub_copy[0,0] = 420 ## The original array is not modified
x2_sub_copy

array([[420,   5],
       [  7,   6]])

### Reshaping of Arrays

In [55]:
grid = np.arange(1, 10).reshape((3, 3))       
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [56]:
x = np.array([1,2,3])
x.reshape(1,3) ## row vector

array([[1, 2, 3]])

In [57]:
x[np.newaxis,:]

array([[1, 2, 3]])

In [58]:
x.reshape((3, 1)) ## column vector

array([[1],
       [2],
       [3]])

In [59]:
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

### Array Concatenation and Splitting

#### Concatenation of arrays

In [60]:
x = np.array([1, 2, 3])        
y = np.array([3, 2, 1])        
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [61]:
z = [99, 99, 99]        
print(np.concatenate([x, y, z]))

[ 1  2  3  3  2  1 99 99 99]


In [62]:
grid = np.array([[1, 2, 3],[4, 5, 6]])

In [63]:
np.concatenate([grid,grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [64]:
np.concatenate([grid, grid], axis=1) ## concatenate along the second axis (zero-indexed) 

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [65]:
x = np.array([1, 2, 3])        
grid = np.array([[9, 8, 7],[6, 5, 4]])
np.vstack([x, grid]) ## vertically stacks the array

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [66]:
y = np.array([[99],[99]])  
np.hstack([grid,y]) ## horizontally stacks the array

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

#### Splitting of arrays

In [67]:
x = [1, 2, 3, 99, 99, 3, 2, 1]        
x1, x2, x3 = np.split(x, [3, 5])        
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [68]:
grid = np.arange(16).reshape((4, 4))        
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [69]:
upper, lower = np.vsplit(grid, [2]) ## Vertical splitting    
print(upper)        
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [70]:
left, right = np.hsplit(grid, [2]) ## Horizontal splitting
print(left)        
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## Computation of NumPy Arrays: Universal Functions

### The slowness of loop

In [72]:
np.random.seed(0)
def compute_reciprocals(values):           
    output = np.empty(len(values))           
    for i in range(len(values)):               
        output[i] = 1.0 / values[i]           
    return output
values = np.random.randint(1, 10, size=5)       
compute_reciprocals(values)

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [73]:
big_array = np.random.randint(1, 100, size=1000000) ## The operation turns out to be very slow
%timeit compute_reciprocals(big_array)

2.34 s ± 54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Introducing Ufuncs

In [74]:
np.random.seed(0)
values = np.random.randint(1, 10, size=5)    
print(compute_reciprocals(values))
print(1/values)

[0.16666667 1.         0.25       0.25       0.125     ]
[0.16666667 1.         0.25       0.25       0.125     ]


In [75]:
%timeit (1.0/big_array) ## computes faster compared to the function

3.45 ms ± 232 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [76]:
np.arange(5)/np.arange(1, 6)

array([0.        , 0.5       , 0.66666667, 0.75      , 0.8       ])

In [77]:
x = np.arange(9).reshape((3, 3))       
2 ** x

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]])

### Exploring NumPy's Ufuncs

#### Array arithmetic

In [78]:
x = np.arange(4)       
print("x     =", x)       
print("x + 5 =", x + 5)       
print("x - 5 =", x - 5)       
print("x * 2 =", x * 2)
print("x / 2 =", x / 2)       
print("x // 2 =", x // 2) 

x     = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]
x / 2 = [0.  0.5 1.  1.5]
x // 2 = [0 0 1 1]


In [79]:
print("-x     = ", -x)       
print("x ** 2 = ", x ** 2)       
print("x % 2  = ", x % 2)

-x     =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2  =  [0 1 0 1]


In [80]:
-(0.5*x + 1) ** 2

array([-1.  , -2.25, -4.  , -6.25])

In [81]:
np.add(x, 2)

array([2, 3, 4, 5])

#### Absolute value

In [82]:
x = np.array([-2, -1, 0, 1, 2])        
abs(x)

array([2, 1, 0, 1, 2])

In [83]:
np.absolute(x)

array([2, 1, 0, 1, 2])

In [84]:
np.abs(x)

array([2, 1, 0, 1, 2])

In [85]:
x = np.array([3 - 4j, 4 - 3j, 2 + 0j, 0 + 1j])      
np.abs(x)

array([5., 5., 2., 1.])

#### Trigonometric functions

In [86]:
theta = np.linspace(0, np.pi, 3)

In [87]:
print("theta      = ", theta)        
print("sin(theta) = ", np.sin(theta))        
print("cos(theta) = ", np.cos(theta))        
print("tan(theta) = ", np.tan(theta))

theta      =  [0.         1.57079633 3.14159265]
sin(theta) =  [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos(theta) =  [ 1.000000e+00  6.123234e-17 -1.000000e+00]
tan(theta) =  [ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]


In [88]:
x = [-1, 0, 1]        
print("x         = ", x)        
print("arcsin(x) = ", np.arcsin(x))        
print("arccos(x) = ", np.arccos(x))        
print("arctan(x) = ", np.arctan(x))

x         =  [-1, 0, 1]
arcsin(x) =  [-1.57079633  0.          1.57079633]
arccos(x) =  [3.14159265 1.57079633 0.        ]
arctan(x) =  [-0.78539816  0.          0.78539816]


#### Exponents and logarithms

In [89]:
x = [1, 2, 3]        
print("x     =", x)        
print("e^x   =", np.exp(x))        
print("2^x   =", np.exp2(x))        
print("3^x   =", np.power(3, x))

x     = [1, 2, 3]
e^x   = [ 2.71828183  7.3890561  20.08553692]
2^x   = [2. 4. 8.]
3^x   = [ 3  9 27]


In [90]:
x = [1, 2, 4, 10]        
print("x        =", x)        
print("ln(x)    =", np.log(x))        
print("log2(x)  =", np.log2(x))        
print("log10(x) =", np.log10(x))

x        = [1, 2, 4, 10]
ln(x)    = [0.         0.69314718 1.38629436 2.30258509]
log2(x)  = [0.         1.         2.         3.32192809]
log10(x) = [0.         0.30103    0.60205999 1.        ]


In [91]:
x = [0, 0.001, 0.01, 0.1]        
print("exp(x) - 1 =", np.expm1(x))        
print("log(1 + x) =", np.log1p(x))

exp(x) - 1 = [0.         0.0010005  0.01005017 0.10517092]
log(1 + x) = [0.         0.0009995  0.00995033 0.09531018]


#### Specialized ufuncs

In [92]:
from scipy import special

In [93]:
x = [1, 5, 10]
print("gamma(x)     =", special.gamma(x))        
print("ln|gamma(x)| =", special.gammaln(x))        
print("beta(x, 2)   =", special.beta(x, 2))

gamma(x)     = [1.0000e+00 2.4000e+01 3.6288e+05]
ln|gamma(x)| = [ 0.          3.17805383 12.80182748]
beta(x, 2)   = [0.5        0.03333333 0.00909091]


In [94]:
x = np.array([0, 0.3, 0.7, 1.0])        
print("erf(x)  =", special.erf(x))        
print("erfc(x) =", special.erfc(x))        
print("erfinv(x) =", special.erfinv(x))

erf(x)  = [0.         0.32862676 0.67780119 0.84270079]
erfc(x) = [1.         0.67137324 0.32219881 0.15729921]
erfinv(x) = [0.         0.27246271 0.73286908        inf]


### Advanced Ufuncs features

#### Specifying output

In [95]:
x = np.arange(5)        
y = np.empty(5)        
np.multiply(x, 10, out=y)        
print(y)

[ 0. 10. 20. 30. 40.]


In [97]:
y = np.zeros(10)        
np.power(2, x, out=y[::2])        
print(y)

[ 1.  0.  2.  0.  4.  0.  8.  0. 16.  0.]


#### Aggregates

In [98]:
x = np.arange(1, 6)        
np.add.reduce(x)

15

In [99]:
np.multiply.reduce(x)

120

In [100]:
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15])

In [101]:
np.multiply.accumulate(x)

array([  1,   2,   6,  24, 120])

#### Outer Products

In [102]:
x = np.arange(1, 6)        
np.multiply.outer(x, x)

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])

## Aggregations: Min, Max, and Everything in Between 

### Summing the values in an array

In [103]:
L = np.random.random(100)
sum(L)

47.51294159911191

In [104]:
np.sum(L)

47.5129415991119

In [105]:
big_array = np.random.rand(1000000)       
%timeit sum(big_array)       
%timeit np.sum(big_array)

104 ms ± 3.45 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
484 µs ± 15.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Minimum and Maximum

In [106]:
min(big_array), max(big_array)

(7.071203171893359e-07, 0.9999997207656334)

In [107]:
np.min(big_array), np.max(big_array)

(7.071203171893359e-07, 0.9999997207656334)

In [108]:
%timeit min(big_array)       
%timeit np.min(big_array)

80.5 ms ± 2.86 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
465 µs ± 20.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [109]:
print(big_array.min(), big_array.max(), big_array.sum())

7.071203171893359e-07 0.9999997207656334 500384.6391052089


#### Multidimensional aggregates

In [110]:
M = np.random.random((3,4))
print(M)

[[0.78005386 0.29509276 0.23792159 0.36427966]
 [0.28696762 0.13911998 0.81564486 0.33446842]
 [0.08974411 0.44701982 0.34255858 0.82397382]]


In [111]:
M.sum()

4.95684508810505

In [112]:
M.min(axis = 0) ## minimum across each column

array([0.08974411, 0.13911998, 0.23792159, 0.33446842])

In [113]:
M.min(axis = 1) ## minimum across each row

array([0.23792159, 0.13911998, 0.08974411])

#### Other aggregate functions

In [114]:
big_array.mean()

0.5003846391052089

In [116]:
big_array.std()

0.2886949454856943

In [117]:
np.percentile(big_array,25)

0.2503204663344487

In [118]:
np.median(big_array)

0.5008045892041464

In [119]:
np.percentile(big_array,75)

0.7508103400791966

In [120]:
np.mean(big_array)

0.5003846391052089