# Numpy Introduction

## Numpy Array
* closer to hardware (efficiency)
* designed for scientific computation (convenience)


In [1]:
# Import convention as np
import numpy as np

In [2]:
# A comparison between list and numpy array
L = range(1000)
# Use magic function, %timeit, to evaluate the time
# List
%timeit [i**2 for i in L]

1000 loops, best of 3: 554 µs per loop


In [3]:
a = np.arange(1000)
# Numpy array
%timeit a ** 2

The slowest run took 27.89 times longer than the fastest. This could mean that an intermediate result is being cached 
1000000 loops, best of 3: 1.47 µs per loop


#### 1-D array

In [4]:
a = np.array([0, 1, 2, 3])
a

array([0, 1, 2, 3])

In [5]:
# Data type
type(a)

numpy.ndarray

In [6]:
a.ndim

1

In [7]:
a.shape

(4,)

In [8]:
len(a)

4

#### 2-D, 3-D... array

In [9]:
a2 = np.array([[0, 1, 2], [3, 4, 5]])
a2

array([[0, 1, 2],
       [3, 4, 5]])

In [10]:
a2.ndim

2

In [11]:
# Dimension
a2.shape

(2, 3)

In [12]:
# Three dimension
a3 = np.array([[[0], [1]], [[2], [3]]])
a3

array([[[0],
        [1]],

       [[2],
        [3]]])

In [13]:
a3.ndim

3

In [14]:
a3.shape

(2, 2, 1)

### Useful methods to create array: arange, linespace, zeros, ones, eye, diag

In [15]:
# Works like seq in R
# 0 to 9, length is 10
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [16]:
# arguments: start, end(not included), by
np.arange(1, 10, 3)

array([1, 4, 7])

In [17]:
# arguments: start, end(included by default), length or number of points
np.linspace(1, 5, 9)

array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ])

In [18]:
# With endpoint argument to remove the end
# Length still 9, may be confused!
np.linspace(1, 5, 9, endpoint=False)

array([ 1.        ,  1.44444444,  1.88888889,  2.33333333,  2.77777778,
        3.22222222,  3.66666667,  4.11111111,  4.55555556])

In [19]:
np.ones(5)

array([ 1.,  1.,  1.,  1.,  1.])

In [20]:
np.ones((2, 3))

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [21]:
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [22]:
# Identity matrix
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [23]:
# Create diagonal matrix
np.diag(np.array([3, 4, 5]))

array([[3, 0, 0],
       [0, 4, 0],
       [0, 0, 5]])

In [24]:
# Create empty array
np.empty(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

### Random Number

In [25]:
# Five uniform random number
np.random.rand(5)

array([ 0.2394682 ,  0.88953936,  0.05925515,  0.80926857,  0.10220036])

In [26]:
# Set seed
np.random.seed(0)
np.random.rand(5)

array([ 0.5488135 ,  0.71518937,  0.60276338,  0.54488318,  0.4236548 ])

In [27]:
# Same result
np.random.seed(0)
np.random.rand(5)

array([ 0.5488135 ,  0.71518937,  0.60276338,  0.54488318,  0.4236548 ])

### Data type

In [28]:
aint = np.array([1, 2, 3])
aint.dtype

dtype('int32')

In [29]:
afloat = np.array([1., 2., 3.])
afloat.dtype

dtype('float64')

In [30]:
# Default is float
np.ones(5).dtype

dtype('float64')

In [31]:
acomplex = np.array([1+2j, 3+4j, 5+6*1j])
acomplex.dtype

dtype('complex128')

In [32]:
abool = np.array([True, False])
abool.dtype

dtype('bool')

In [33]:
astr = np.array(['Hello', 'World', 'Yes'])
# All words' number of letters is less then 5
astr.dtype

dtype('<U5')

### Indexing and slicing

In [34]:
# It is similar to list indexing
a = np.arange(10)
a[0], a[4], a[-1]

(0, 4, 9)

In [35]:
# Start:end(not included):by
a[0:3:1]

array([0, 1, 2])

In [36]:
# Same index will be repeated as in R
a[[5, 5, 6, 2, 3, 3]]

array([5, 5, 6, 2, 3, 3])

In [37]:
# Reverse
a[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [38]:
a = np.diag(np.arange(3))
a

array([[0, 0, 0],
       [0, 1, 0],
       [0, 0, 2]])

In [39]:
# For multidimensonal, index by tuple
a[1, 1]

1

In [40]:
# Second row(line)
a[1]

array([0, 1, 0])

In [41]:
# Third line, second column 
a[2, 1] = 10
a

array([[ 0,  0,  0],
       [ 0,  1,  0],
       [ 0, 10,  2]])

In [42]:
# Second column
a[:,1]

array([ 0,  1, 10])

In [43]:
# Second line, first to second column
a[2, 1:3]

array([10,  2])

In [44]:
# Vectorize create
np.arange(6) + np.arange(0, 51, 10)[:, np.newaxis]

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

In [45]:
np.arange(0, 51, 10)[:, np.newaxis]

array([[ 0],
       [10],
       [20],
       [30],
       [40],
       [50]])

In [46]:
## Exercise 1
## <<Create this array by 3 line of code>>
## [[1, 1, 1, 1],
##  [1, 1, 1, 1],
##  [1, 1, 1, 2],
##  [1, 6, 1, 1]]

a = np.ones([4, 4])
a[2, 3] = 2
a[3, 1] = 6

a

array([[ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  2.],
       [ 1.,  6.,  1.,  1.]])

In [47]:
## Exercise 2
## <<Create this array by 1 line of code>>
## [[0., 0., 0., 0., 0.],
##  [2., 0., 0., 0., 0.],
##  [0., 3., 0., 0., 0.],
##  [0., 0., 4., 0., 0.],
##  [0., 0., 0., 5., 0.],
##  [0., 0., 0., 0., 6.]]

np.diag(np.arange(2., 7.), -1)[:,0:5]

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 2.,  0.,  0.,  0.,  0.],
       [ 0.,  3.,  0.,  0.,  0.],
       [ 0.,  0.,  4.,  0.,  0.],
       [ 0.,  0.,  0.,  5.,  0.],
       [ 0.,  0.,  0.,  0.,  6.]])

In [48]:
## Exercise 3
## Check np.tile
## <<Create this array by 1 line of code>>
## [[4, 3, 4, 3, 4, 3],
##  [2, 1, 2, 1, 2, 1],
##  [4, 3, 4, 3, 4, 3],
##  [2, 1, 2, 1, 2, 1]]

np.tile(np.array([[4, 3], [2, 1]]), (2, 3))

array([[4, 3, 4, 3, 4, 3],
       [2, 1, 2, 1, 2, 1],
       [4, 3, 4, 3, 4, 3],
       [2, 1, 2, 1, 2, 1]])

### Slicing: view (Share memory )
This is important because you may make mistake while you do not notice it!

In [49]:
a = np.arange(10)
b = a[::2]

In [50]:
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [51]:
b

array([0, 2, 4, 6, 8])

In [52]:
# Check wheher the two objects share the same memory. This is heuristic.(Sometimes it may fail)
np.may_share_memory(a, b)

True

In [53]:
# Change the element in b
b[0] = 10
b

array([10,  2,  4,  6,  8])

In [54]:
# It changes the first element in a as well
a

array([10,  1,  2,  3,  4,  5,  6,  7,  8,  9])

In [55]:
# USE COPY to avoid the problem
a = np.arange(10)
c = a[::2].copy()

In [56]:
np.may_share_memory(a, c)

False

In [57]:
c[0] = 10
c

array([10,  2,  4,  6,  8])

In [58]:
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

### Interesting example: sieve of Eratosthenes
1. Skip j which are already known to not be primes
2. The first number to cross out is $j^2$

In [59]:
is_prime = np.ones((100), dtype=bool)
# 0, 1 are not prime numbers
is_prime[:2] = 0
N_max = int(np.sqrt(len(is_prime)))
for j in range(2, N_max):
    is_prime[2*j::j] = False
    
is_prime

array([False, False,  True,  True, False,  True, False,  True, False,
       False, False,  True, False,  True, False, False, False,  True,
       False,  True, False, False, False,  True, False, False, False,
       False, False,  True, False,  True, False, False, False, False,
       False,  True, False, False, False,  True, False,  True, False,
       False, False,  True, False, False, False, False, False,  True,
       False, False, False, False, False,  True, False,  True, False,
       False, False, False, False,  True, False, False, False,  True,
       False,  True, False, False, False, False, False,  True, False,
       False, False,  True, False, False, False, False, False,  True,
       False, False, False, False, False, False, False,  True, False, False], dtype=bool)

In [60]:
# Get the index(i.e. prime number here) of non-zero
np.nonzero(is_prime)

(array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
        61, 67, 71, 73, 79, 83, 89, 97], dtype=int64),)

In [61]:
# You can also use boolean as index
np.arange(100)[is_prime]

array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
       61, 67, 71, 73, 79, 83, 89, 97])

In [62]:
a = np.arange(25)
a.shape = (5, 5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [63]:
# Upper triangle index, k=1 means above diagonal
a[np.triu_indices(n=5, k=1)] = 0
a

array([[ 0,  0,  0,  0,  0],
       [ 5,  6,  0,  0,  0],
       [10, 11, 12,  0,  0],
       [15, 16, 17, 18,  0],
       [20, 21, 22, 23, 24]])

### Useful methods for array object: